diff --git a/.gitignore b/.gitignore index e4e47aa4b705c9035394f5ea15c0d5e653859206..3b236ea5bdd793ed34603010c520fa1f1a43e34b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,10 @@ buildGCC _skbuild/ dist/ *.egg-info/ -__pycache__/ +**/__pycache__/ .venv/ +pythonbindings/pyfluids/bindings* +pythonbindings/pymuparser/bindings* # IDE .vscode/ @@ -38,4 +40,7 @@ stl/ .DS_Store # Settings -.gitconfig \ No newline at end of file +.gitconfig + +# User Settings +CMakeUserPresets.json \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b34c5a8f66c1340670b6acd80ea6a9901b2760d1..e171e2e7fbe1984588355f5a833a21160024da32 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ ############################################################################### ## VirtualFluids CI Pipeline ## ############################################################################### -image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.3 +image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4 stages: - build @@ -49,7 +49,7 @@ stages: - cd $CI_PROJECT_DIR/$BUILD_FOLDER - rm -r -f ./* - cmake .. -LAH - --preset=all_make + --preset=make_all -DBUILD_WARNINGS_AS_ERRORS=ON -DCMAKE_CUDA_ARCHITECTURES=60 - make -j4 @@ -75,7 +75,7 @@ clang_10: - export CXX=clang++ ############################################################################### -msvc_16: +msvc_17: stage: build tags: @@ -92,14 +92,14 @@ msvc_16: - git --version - $env:Path += ";C:\Program Files\CMake\bin\" - cmake --version - - $env:Path += ";C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin" + - $env:Path += ";C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin" - MSBuild.exe -version script: - cd $CI_PROJECT_DIR - md -force $env:BUILD_FOLDER - cd $env:BUILD_FOLDER - - cmake .. --preset=all_msvc -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON + - cmake .. --preset=msvc_all -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON - MSBuild.exe VirtualFluids.sln /property:Configuration=$env:BUILD_CONFIGURATION /verbosity:minimal /maxcpucount:4 artifacts: @@ -126,33 +126,44 @@ gcc_9_python: paths: - build/ - dist/ + - _skbuild/ before_script: - export CCACHE_BASEDIR=$CI_PROJECT_DIR - export CCACHE_DIR=$CI_PROJECT_DIR/cache script: - - python3 setup.py bdist_wheel build_ext --build-temp=build + - python3 setup.py bdist_wheel build_ext --build-temp=_skbuild -- -DBUILD_VF_CPU=ON -DBUILD_VF_DOUBLE_ACCURACY=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache ############################################################################### ## Container Upload ## ############################################################################### -build_singularity_image: +build_poiseuille_test_container: + image: + name: quay.io/singularity/singularity:v3.10.2 + entrypoint: [""] + stage: container_upload - needs: - - gcc_9_python + rules: + - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule" + when: always + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + when: never + - when: manual + allow_failure: true tags: - linux - privileged - rules: - - if: $CI_COMMIT_TAG + artifacts: + expire_in: 1 hrs + paths: + - Containers/PoiseuilleTestContainer.sif script: - - singularity build Containers/VirtualFluidsPython.sif Containers/VirtualFluidsPython.def - - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Containers/VirtualFluidsPython.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG" + - singularity build "Containers/PoiseuilleTestContainer.sif" "Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def" ############################################################################### ## Tests ## @@ -169,14 +180,14 @@ gcc_9_unit_tests: - ctest ############################################################################### -msvc_16_unit_tests: +msvc_17_unit_tests: stage: test tags: - win - gpu - needs: ["msvc_16"] + needs: ["msvc_17"] before_script: - $env:Path += ";C:\Program Files\CMake\bin\" @@ -202,6 +213,52 @@ gcc_9_python_bindings_test: - python3 -m unittest discover -s Python -v +############################################################################### +gcc_9_python_hpc_test: + image: python:latest + stage: test + + needs: ["build_poiseuille_test_container"] + + rules: + - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule" + when: always + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + when: never + - when: manual + allow_failure: true + + before_script: + - pip install hpc-rocket + + script: + - hpc-rocket launch --watch Python/SlurmTests/poiseuille/rocket.yml + +############################################################################### +multigpu_hpc_test: + image: python:latest + stage: test + + rules: + - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule" + when: always + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + when: never + - when: manual + allow_failure: true + + before_script: + - pip install hpc-rocket + - pip install "fieldcompare[all]" + + script: + - hpc-rocket launch --watch regression-tests/multigpu_test/rocket.yml + - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data + - cd test_data + - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels + - cd .. + - fieldcompare dir output/results test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "*.vtu" + ############################################################################### ## Benchmark ## ############################################################################### @@ -248,7 +305,7 @@ gpu_numerical_tests: - cd $CI_PROJECT_DIR/build - rm -r -f ./* - cmake .. - --preset=gpu_numerical_tests_make + --preset=make_numerical_tests_gpu -DCMAKE_CUDA_ARCHITECTURES=60 -DPATH_NUMERICAL_TESTS=/tmp/test_data/numerical_tests_gpu - make -j4 @@ -319,8 +376,7 @@ clang_build_analyzer_clang_10: - mkdir -p $CI_PROJECT_DIR/build - cd $CI_PROJECT_DIR/build - cmake .. - -DBUILD_VF_CPU=ON - -DBUILD_VF_GPU=ON + --preset=make_all -DCMAKE_CUDA_ARCHITECTURES=60 -DCMAKE_CXX_FLAGS=-ftime-trace - ClangBuildAnalyzer --start . @@ -352,8 +408,7 @@ include_what_you_use_clang_10: - mkdir -p $CI_PROJECT_DIR/build - cd $CI_PROJECT_DIR/build - cmake .. - -DBUILD_VF_CPU=ON - -DBUILD_VF_GPU=ON + --preset=make_all -DCMAKE_CUDA_ARCHITECTURES=60 -DBUILD_VF_INCLUDE_WHAT_YOU_USE=ON - make @@ -430,7 +485,7 @@ gcov_gcc_9: - mkdir -p $CI_PROJECT_DIR/build - cd $CI_PROJECT_DIR/build - cmake .. - --preset=all_make + --preset=make_all -DCMAKE_CUDA_ARCHITECTURES=60 -DBUILD_VF_COVERAGE=ON - make -j4 @@ -473,6 +528,7 @@ clang-tidy: - cd $CI_PROJECT_DIR/build - cmake .. -DBUILD_VF_CPU=ON + -DBUILD_VF_DOUBLE_ACCURACY=ON -DBUILD_VF_GPU=OFF - python3 ../utilities/filterCompileCommands.py compile_commands.json - run-clang-tidy -quiet > clangtidy.txt @@ -604,26 +660,3 @@ sonar-scanner: script: - cd $CI_PROJECT_DIR - sonar-scanner -X -Dsonar.verbose=true -Dsonar.login=$SONAR_SECURITY_TOKEN - -############################################################################### -## Release ## -############################################################################### -create_release: - stage: release - - image: registry.gitlab.com/gitlab-org/release-cli:latest - - needs: ["build_singularity_image"] - - rules: - - if: $CI_COMMIT_TAG - - script: - - echo "Creating release with tag $CI_COMMIT_TAG" - - release-cli create --name "VirtualFluids $CI_COMMIT_TAG" \ - --description "VirtualFluids CFD Simulator" \ - --tag-name "$CI_COMMIT_TAG" \ - --ref "$CI_COMMIT_SHA" \ - --job-token "$CI_JOB_TOKEN" \ - --assets-link="{'name':'VirtualFluidsSingularityImage_OpenMPI','url':'','type':'other','filepath':'Containers/VirtualFluidsOpenMPI.sif'}" - - build/bin/basicsTests diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000000000000000000000000000000000..50d4989d5c269521392644515d716fa93b3cf6e3 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,40 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +type: software +authors: + - family-names: Kutscher + given-names: Konstantin + orcid: https://orcid.org/0000-0002-1099-1608 + - family-names: Schönherr + given-names: Martin + orcid: https://orcid.org/0000-0002-4774-1776 + - family-names: Geier + given-names: Martin + orcid: https://orcid.org/0000-0002-8367-9412 + - family-names: Krafczyk + given-names: Manfred + orcid: https://orcid.org/0000-0002-8509-0871 + - family-names: Alihussein + given-names: Hussein + orcid: https://orcid.org/0000-0003-3656-7028 + - family-names: Linxweiler + given-names: Jan + orcid: https://orcid.org/0000-0002-2755-5087 + - family-names: Peters + given-names: Sören + orcid: https://orcid.org/0000-0001-5236-3776 + - family-names: Wellmann + given-names: Anna + orcid: https://orcid.org/0000-0002-8825-2995 + - family-names: Safari + given-names: Hesameddin + orcid: https://orcid.org/0000-0002-2755-5087 + - family-names: Marcus + given-names: Sven + orcid: https://orcid.org/0000-0003-3689-2162 +title: "VirtualFluids" +version: 0.1.0 +license: GPL-3.0-or-later +repository-code: "https://git.rz.tu-bs.de/irmb/VirtualFluids" +date-released: "XXXXXXX" + diff --git a/CMake/FileUtilities.cmake b/CMake/FileUtilities.cmake index 151000a681795923d4e31ed8c5f06dfd1e7af7fd..13057ef832b5aa2d7ce303fe55e95a91284f5f56 100644 --- a/CMake/FileUtilities.cmake +++ b/CMake/FileUtilities.cmake @@ -5,7 +5,7 @@ ## After function call the files are stored in: MY_SRCS ################################################################################# -macro(includeAllFiles targetName file_path) +macro(includeAllFiles folderName targetName file_path) if(NOT DEFINED collectTestFiles) set(collectTestFiles ON) endif() @@ -14,11 +14,11 @@ macro(includeAllFiles targetName file_path) set(collectProductionFiles ON) endif() - includeFiles(${targetName} "${file_path}") + includeFiles(${folderName} ${targetName} "${file_path}") endmacro(includeAllFiles) -macro(includeProductionFiles targetName file_path) +macro(includeProductionFiles folderName targetName file_path) if(NOT DEFINED collectTestFiles) set(collectTestFiles OFF) endif() @@ -27,12 +27,12 @@ macro(includeProductionFiles targetName file_path) set(collectProductionFiles ON) endif() - includeFiles(${targetName} "${file_path}") + includeFiles(${folderName} ${targetName} "${file_path}") endmacro(includeProductionFiles) -macro(includeTestFiles targetName file_paths) +macro(includeTestFiles folderName file_paths) if(NOT DEFINED collectTestFiles) set(collectTestFiles ON) endif() @@ -41,13 +41,13 @@ macro(includeTestFiles targetName file_paths) set(collectProductionFiles OFF) endif() - includeFiles(${targetName} "${file_paths}") + includeFiles(${folderName} ${folderName} "${file_paths}") endmacro(includeTestFiles) -macro(includeFiles targetName file_paths) +macro(includeFiles folderName targetName file_paths) foreach(file ${file_paths}) @@ -57,7 +57,7 @@ macro(includeFiles targetName file_paths) collectFilesFrom(${file}) if (package_dir) - setSourceGroupForFilesIn(${file} ${package_dir} ${targetName}) + setSourceGroupForFilesIn(${file} ${package_dir} ${targetName} ${folderName}) endif() endforeach() @@ -90,9 +90,9 @@ endmacro() -macro(setSourceGroupForFilesIn file package_dir targetName) +macro(setSourceGroupForFilesIn file package_dir targetName folderName) #input: target_name PACKAGE_SRCS - buildSourceGroup(${targetName} ${package_dir}) + buildSourceGroup(${folderName} ${package_dir}) if(isAllTestSuite) source_group(${targetName}\\${SOURCE_GROUP} FILES ${file}) @@ -105,20 +105,20 @@ endmacro(setSourceGroupForFilesIn) -macro(buildSourceGroup targetName path) -#input: targetName (e.g. lib name, exe name) +macro(buildSourceGroup folderName path) +#input: folderName (e.g. name of folder after src/) unset(SOURCE_GROUP) string(REPLACE "/" ";" folderListFromPath ${path}) - set(findTargetName 0) + set(findFolderName 0) foreach(folder ${folderListFromPath}) - if(findTargetName) + if(findFolderName) set(SOURCE_GROUP ${SOURCE_GROUP}\\${folder}) endif() - if(${folder} STREQUAL ${targetName}) - SET(findTargetName 1) + if(${folder} STREQUAL ${folderName}) + SET(findFolderName 1) endif() endforeach() diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake index 63503f5f14221bb8cec7670dbdda6aa92497d327..4fd163b2cc1b53fe461ef482d906f4cb1255a76c 100644 --- a/CMake/VirtualFluidsMacros.cmake +++ b/CMake/VirtualFluidsMacros.cmake @@ -105,14 +105,15 @@ function(vf_add_library) set( options ) set( oneValueArgs NAME BUILDTYPE) - set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE) + set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE MODULEFOLDER) cmake_parse_arguments( ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - if(DEFINED ARG_NAME) + if(DEFINED ARG_NAME) set(library_name ${ARG_NAME}) else() vf_get_library_name (library_name) endif() + vf_get_library_name (folder_name) # folder_name is not equal to library_name when ARG_NAME was set if(NOT DEFINED ARG_BUILDTYPE) if(BUILD_SHARED_LIBS) @@ -122,12 +123,16 @@ function(vf_add_library) endif() endif() + if(DEFINED ARG_MODULEFOLDER) + set(folder_name ${ARG_MODULEFOLDER}) + endif() + status("Configuring the target: ${library_name} (type=${ARG_BUILDTYPE})...") collectFiles(sourceFiles "${ARG_FILES}" "${ARG_FOLDER}" "${ARG_EXCLUDE}") - includeProductionFiles (${library_name} "${sourceFiles}") + includeProductionFiles (${folder_name} ${library_name} "${sourceFiles}") ################################################################# ### ADD TARGET ### @@ -325,4 +330,4 @@ function(vf_load_user_apps) foreach(app IN LISTS USER_APPS) add_subdirectory(${app}) endforeach() -endfunction() \ No newline at end of file +endfunction() diff --git a/CMake/cmake_config_files/MOLLOK.config.cmake b/CMake/cmake_config_files/MOLLOK.config.cmake index f700f3cd7a4b5669ef6ffee9436a1528e50e9dc9..72470da1bc52a242cb8e3c341e0e7f87bb06ab26 100644 --- a/CMake/cmake_config_files/MOLLOK.config.cmake +++ b/CMake/cmake_config_files/MOLLOK.config.cmake @@ -12,4 +12,5 @@ set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/") list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}") # add invidual apps here -list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB") \ No newline at end of file +list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB") +list(APPEND USER_APPS "apps/gpu/LBM/TGV_3D_GridRef") diff --git a/CMake/cmake_config_files/MULE.config.cmake b/CMake/cmake_config_files/MULE.config.cmake index 02f61b7988c5b3af9cd58bc52e46b1b2edfe8aae..2afbce6cc257fa0b8ff4dd7de580cb50c01369f1 100644 --- a/CMake/cmake_config_files/MULE.config.cmake +++ b/CMake/cmake_config_files/MULE.config.cmake @@ -1 +1,4 @@ -SET(CMAKE_CUDA_ARCHITECTURES "75") \ No newline at end of file +SET(CMAKE_CUDA_ARCHITECTURES "75") + +list(APPEND USER_APPS "apps/gpu/LBM/ActuatorLine") +list(APPEND USER_APPS "apps/gpu/LBM/SphereScaling") diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake index d31d8684a53a769e48408ad5febe7d2c6b22c623..5ca4d9821d918f66745fc27363975811dc278440 100644 --- a/CMake/cmake_config_files/PHOENIX.config.cmake +++ b/CMake/cmake_config_files/PHOENIX.config.cmake @@ -28,7 +28,7 @@ set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100 set(GPU_APP "apps/gpu/LBM/") list(APPEND USER_APPS - # "${GPU_APP}DrivenCavityMultiGPU" + "${GPU_APP}DrivenCavityMultiGPU" # "${GPU_APP}SphereScaling" # "${GPU_APP}MusselOyster" ) diff --git a/CMakeLists.txt b/CMakeLists.txt index b3af407acd66ec3223f55de7753df879786ce561..c6498bf19bb021f3ae19d69c4131aa56476149be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ cmake_minimum_required(VERSION 3.15..3.20 FATAL_ERROR) project(VirtualFluids - VERSION 1.0.0 + VERSION 0.1.0 DESCRIPTION "CFD code based on the Lattice Boltzmann Method" HOMEPAGE_URL "https://www.tu-braunschweig.de/irmb/forschung/virtualfluids" LANGUAGES CXX) diff --git a/CMakePresets.json b/CMakePresets.json index 0f360fd303cdcad923b01d56df5c6d48ad62ca2c..6e2658d148bddf55950e5849adcf10709a8b8caf 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,34 +1,47 @@ { - "version": 2, + "version": 3, "cmakeMinimumRequired": { "major": 3, - "minor": 20, + "minor": 21, "patch": 0 }, "configurePresets": [ { "name": "default", + "binaryDir": "build", + "hidden": true + }, + { + "name": "msvc", "hidden": true, - "binaryDir": "${sourceDir}/build/", - "cacheVariables": { - "BUILD_VF_UNIT_TESTS": "ON" - } + "generator": "Visual Studio 17 2022", + "architecture": "x64", + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } }, { - "name": "default_make", - "inherits": "default", + "name": "make", "hidden": true, - "generator": "Unix Makefiles" + "generator": "Unix Makefiles", + "condition": { + "type": "notEquals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } }, { - "name": "default_msvc", - "inherits": "default", + "name": "unit_tests", "hidden": true, - "generator": "Visual Studio 16 2019", - "architecture": "x64" + "cacheVariables": { + "BUILD_VF_UNIT_TESTS": "ON" + } }, { - "name": "default_cpu", + "name": "cpu", + "inherits": "default", "hidden": true, "description": "CPU build of VirtualFluids", "cacheVariables": { @@ -37,7 +50,8 @@ } }, { - "name": "default_gpu", + "name": "gpu", + "inherits": "default", "hidden": true, "description": "GPU build of VirtualFluids", "cacheVariables": { @@ -46,9 +60,10 @@ } }, { - "name": "default_gpu_numerical_tests", + "name": "gpu_numerical_tests", "inherits": [ - "default_gpu" + "gpu", + "unit_tests" ], "hidden": true, "description": "GPU numerical tests of VirtualFluids", @@ -58,78 +73,74 @@ } }, { - "name": "default_all", - "hidden": true, - "description": "All build of VirtualFluids", + "name": "make_all", "inherits": [ - "default_cpu", - "default_gpu" + "cpu", + "gpu", + "unit_tests", + "make" ], - "cacheVariables": { - "BUILD_VF_DOUBLE_ACCURACY": "ON" - } + "displayName": "all make configuration" }, { - "name": "cpu_make", + "name": "make_cpu", "inherits": [ - "default_make", - "default_cpu" + "cpu", + "unit_tests", + "make" ], "displayName": "cpu make configuration" }, { - "name": "cpu_msvc", - "inherits": [ - "default_msvc", - "default_cpu" - ], - "displayName": "cpu msvc configuration" - }, - { - "name": "gpu_make", + "name": "make_gpu", "inherits": [ - "default_make", - "default_gpu" + "gpu", + "unit_tests", + "make" ], "displayName": "gpu make configuration" }, { - "name": "gpu_msvc", + "name": "msvc_all", "inherits": [ - "default_msvc", - "default_gpu" + "cpu", + "gpu", + "unit_tests", + "msvc" ], - "displayName": "gpu msvc configuration" + "displayName": "all msvc configuration" }, { - "name": "all_make", + "name": "msvc_cpu", "inherits": [ - "default_make", - "default_all" + "cpu", + "unit_tests", + "msvc" ], - "displayName": "all make configuration" + "displayName": "cpu msvc configuration" }, { - "name": "all_msvc", + "name": "msvc_gpu", "inherits": [ - "default_msvc", - "default_all" + "gpu", + "unit_tests", + "msvc" ], - "displayName": "all msvc configuration" + "displayName": "gpu msvc configuration" }, { - "name": "gpu_numerical_tests_make", + "name": "make_numerical_tests_gpu", "inherits": [ - "default_make", - "default_gpu_numerical_tests" + "gpu_numerical_tests", + "make" ], "displayName": "gpu numerical tests make configuration" }, { - "name": "gpu_numerical_tests_msvc", + "name": "msvc_numerical_tests_gpu", "inherits": [ - "default_msvc", - "default_gpu_numerical_tests" + "msvc", + "gpu_numerical_tests" ], "displayName": "gpu numerical tests msvc configuration" } diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..adafcf99560acd9da79aa060194df8263b6e77e0 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include pythonbindings/*/bindings* \ No newline at end of file diff --git a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def index a3836e7906b9be66ec79f68bf53ccc079db9d9ef..d31a7b82a4e9e988f815139fb46318d231d450f8 100644 --- a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def +++ b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def @@ -1,11 +1,13 @@ BootStrap: docker From: ubuntu:20.04 +Stage: build %files 3rdParty 3rdParty apps apps CMake CMake Python Python + pythonbindings pythonbindings src src CMakeLists.txt CMakeLists.txt cpu.cmake cpu.cmake @@ -19,7 +21,8 @@ From: ubuntu:20.04 apt-get update && \ apt-get install -y \ build-essential \ - cmake=3.16.3-1ubuntu1 \ + ccache \ + git \ python3 \ python3-dev \ python3-pip \ @@ -27,10 +30,32 @@ From: ubuntu:20.04 libomp-dev \ libgl1 - pip3 install setuptools wheel numpy scipy pyvista + pip3 install setuptools wheel cmake numpy scipy pyvista scikit-build export PYTHONPATH=Python - python3 /setup.py install + python3 /setup.py bdist_wheel build_ext --build-temp=_skbuild -- -DBUILD_VF_CPU=ON -DBUILD_VF_DOUBLE_ACCURACY=ON + + pip3 install $(find dist/*.whl) + + +BootStrap: docker +From: ubuntu:20.04 +Stage: runtime + +%files from build + Python Python + dist dist + +%post + export DEBIAN_FRONTEND=noninteractive + apt-get update && \ + apt-get install -y \ + python3 \ + python3-pip \ + mpich \ + libgl1 + + pip3 install $(find dist/*.whl) %environment export PYTHONPATH=/Python diff --git a/Python/SlurmTests/poiseuille/rocket.yml b/Python/SlurmTests/poiseuille/rocket.yml new file mode 100644 index 0000000000000000000000000000000000000000..b186469a4d3fd4b8edfafa4fc3f6dcd64e311d70 --- /dev/null +++ b/Python/SlurmTests/poiseuille/rocket.yml @@ -0,0 +1,23 @@ +host: $REMOTE_HOST +user: $REMOTE_USER +private_keyfile: $PRIVATE_KEY + +copy: + - from: Python/SlurmTests/poiseuille/slurm.job + to: poiseuille_test/slurm.job + overwrite: true + + - from: Containers/PoiseuilleTestContainer.sif + to: poiseuille_test/PoiseuilleTestContainer.sif + overwrite: true + +collect: + - from: poiseuille_test/POISEUILLE_TEST.out + to: POISEUILLE_TEST.out + overwrite: true + +#clean: +# - poiseuille_test/PoiseuilleTestContainer.sif + +sbatch: poiseuille_test/slurm.job +continue_if_job_fails: true diff --git a/Python/SlurmTests/poiseuille/settings.py b/Python/SlurmTests/poiseuille/settings.py index 4b4a1e4e9cc7f6118a60c22a40c70b027e3ac4e2..a3cdc5dc8b627612c2d57a58db36c9fbaa72efac 100644 --- a/Python/SlurmTests/poiseuille/settings.py +++ b/Python/SlurmTests/poiseuille/settings.py @@ -1,25 +1,58 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file settings.py +! \ingroup Poiseuille +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import os from acousticscaling import OneDirectionalAcousticScaling -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters +from pyfluids import cpu -grid_params = GridParameters() +grid_params = cpu.parameters.GridParameters() grid_params.node_distance = 1 grid_params.number_of_nodes_per_direction = [1, 1, 16] grid_params.blocks_per_direction = [1, 1, 4] grid_params.periodic_boundary_in_x1 = True grid_params.periodic_boundary_in_x2 = True -physical_params = PhysicalParameters() +physical_params = cpu.parameters.PhysicalParameters() physical_params.lattice_viscosity = 1e-4 -runtime_params = RuntimeParameters() +runtime_params = cpu.parameters.RuntimeParameters() runtime_params.number_of_threads = int(os.environ["PYFLUIDS_NUM_THREADS"]) runtime_params.number_of_timesteps = 4_000_000 runtime_params.timestep_log_interval = 1_000_000 -kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) +kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) kernel.use_forcing = True kernel.forcing_in_x1 = 5e-10 diff --git a/Python/SlurmTests/poiseuille/simulation_runner.py b/Python/SlurmTests/poiseuille/simulation_runner.py index 03fb24be7ea1a6468ae25ec3aa40ab59962ef91e..d54a35e72b298562f8ccec82677089f3898eec9b 100644 --- a/Python/SlurmTests/poiseuille/simulation_runner.py +++ b/Python/SlurmTests/poiseuille/simulation_runner.py @@ -5,7 +5,7 @@ from poiseuille.simulation import run_simulation from pyfluids.cpu.writer import Writer, OutputFormat -scale_level = int(os.environ["PYFLUIDS_SCALE_LEVEL"]) +scale_level = int(os.environ.get("PYFLUIDS_SCALE_LEVEL", 1)) grid_params, physical_params, runtime_params, kernel = Scaling.configuration_for_scale_level(scale_level) writer = Writer() diff --git a/Python/SlurmTests/poiseuille/slurm.job b/Python/SlurmTests/poiseuille/slurm.job index 488fc9a42f261d69a8212cff389721fdfb9cbf6e..b4e4da271920479ade008b28d4d2e6ce6343c3d3 100644 --- a/Python/SlurmTests/poiseuille/slurm.job +++ b/Python/SlurmTests/poiseuille/slurm.job @@ -1,5 +1,6 @@ #!/bin/bash #SBATCH -J PyFluidsTest +#SBATCH -o poiseuille_test/POISEUILLE_TEST.out #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=20 @@ -9,6 +10,9 @@ #SBATCH --partition=standard source $HOME/.bashrc +module load singularity/3.9.9 + +cd poiseuille_test echo "PyFluids Poiseuille Test Case" echo "Number of tasks: ${SLURM_NTASKS}" diff --git a/Python/acousticscaling.py b/Python/acousticscaling.py index a664b8e924d648b680562b9aef11bee87b3562b1..7e71fed9fdd9f86415261ef4e22797021581f60c 100644 --- a/Python/acousticscaling.py +++ b/Python/acousticscaling.py @@ -1,22 +1,55 @@ -from pyfluids.cpu.kernel import LBMKernel -from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file acousticscaling.py +! \ingroup tests +! \author Sven Marcus, Henry Korb +======================================================================================= +""" +from pyfluids import cpu class OneDirectionalAcousticScaling: - def __init__(self, grid_parameters: GridParameters, - physical_parameters: PhysicalParameters, - runtime_parameters: RuntimeParameters, - kernel: LBMKernel): + def __init__(self, grid_parameters: cpu.parameters.GridParameters, + physical_parameters: cpu.parameters.PhysicalParameters, + runtime_parameters: cpu.parameters.RuntimeParameters, + kernel: cpu.kernel.LBMKernel): self._grid_params = grid_parameters self._physical_params = physical_parameters self._runtime_params = runtime_parameters self._kernel = kernel - def configuration_for_scale_level(self, level: int = 1) -> tuple[GridParameters, - PhysicalParameters, - RuntimeParameters, - LBMKernel]: + def configuration_for_scale_level(self, level: int = 1) -> tuple[cpu.parameters.GridParameters, + cpu.parameters.PhysicalParameters, + cpu.parameters.RuntimeParameters, + cpu.kernel.LBMKernel]: if level < 0: raise ValueError("level must be >= 0") @@ -27,8 +60,8 @@ class OneDirectionalAcousticScaling: return grid_params, physical_params, runtime_params, kernel - def clone_grid_params_for_level(self, level) -> GridParameters: - grid_params = GridParameters() + def clone_grid_params_for_level(self, level) -> cpu.parameters.GridParameters: + grid_params = cpu.parameters.GridParameters() grid_params.reference_direction_index = self._grid_params.reference_direction_index grid_params.periodic_boundary_in_x1 = self._grid_params.periodic_boundary_in_x1 grid_params.periodic_boundary_in_x2 = self._grid_params.periodic_boundary_in_x2 @@ -51,7 +84,7 @@ class OneDirectionalAcousticScaling: return grid_params def clone_physical_parameters(self, level): - physical_params = PhysicalParameters() + physical_params = cpu.parameters.PhysicalParameters() physical_params.lattice_viscosity = self._physical_params.lattice_viscosity if level > 0: @@ -60,7 +93,7 @@ class OneDirectionalAcousticScaling: return physical_params def clone_runtime_params_for_level(self, level): - runtime_params = RuntimeParameters() + runtime_params = cpu.parameters.RuntimeParameters() runtime_params.number_of_timesteps = self._runtime_params.number_of_timesteps runtime_params.number_of_threads = self._runtime_params.number_of_threads runtime_params.timestep_log_interval = self._runtime_params.timestep_log_interval @@ -71,7 +104,7 @@ class OneDirectionalAcousticScaling: return runtime_params def clone_kernel_for_level(self, level): - kernel = LBMKernel(self._kernel.type) + kernel = cpu.kernel.LBMKernel(self._kernel.type) kernel.use_forcing = self._kernel.use_forcing kernel.forcing_in_x1 = self._kernel.forcing_in_x1 kernel.forcing_in_x2 = self._kernel.forcing_in_x2 diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py index 6e3c8608617df1267535984d53307dea9184c6ab..721af737ff6ef3340c3c2f6204aa6a7824cd1d2f 100644 --- a/Python/actuator_line/actuator_line.py +++ b/Python/actuator_line/actuator_line.py @@ -1,23 +1,48 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file actuator_line.py +! \ingroup actuator_line +! \author Henry Korb, Henrik Asmuth +======================================================================================= +""" #%% import numpy as np from pathlib import Path from mpi4py import MPI -from pyfluids import basics, gpu, logger +from pyfluids.bindings import basics, gpu, logger #%% -reference_diameter = 126 - -length = np.array([29,6,6])*reference_diameter -viscosity = 1.56e-5 -velocity = 9 -mach = 0.1 -nodes_per_diameter = 32 - -sim_name = "ActuatorLine" -config_file = Path(__file__).parent/Path("config.txt") +sim_name = "ABL" +config_file = Path(__file__).parent/"configActuatorLine.txt" output_path = Path(__file__).parent/Path("output") output_path.mkdir(exist_ok=True) -t_out = 100. -t_end = 500. + #%% logger.Logger.initialize_logger() @@ -25,87 +50,175 @@ basics.logger.Logger.add_stdout() basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW) basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE) basics.logger.Logger.enable_printed_rank_numbers(True) -# %% -comm = gpu.Communicator.get_instance() #%% grid_factory = gpu.grid_generator.GridFactory.make() grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory) +communicator = gpu.Communicator.get_instance() -#%% -dx = reference_diameter/nodes_per_diameter - -grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) -grid_builder.set_periodic_boundary_condition(False, False, False) -grid_builder.build_grids(basics.LbmOrGks.LBM, False) -#%% config = basics.ConfigurationFile() config.load(str(config_file)) + +para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config) +bc_factory = gpu.BoundaryConditionFactory() + #%% -para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid()) +turbine_diameter = config.get_float_value("turbineDiameter", 126) +boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000) +z0 = config.get_float_value("z0", 0.1) +u_star = config.get_float_value("u_star", 0.4) + +kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant + +viscosity = config.get_float_value("viscosity", 1.56e-5) + +velocity = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s + +mach = config.get_float_value("Ma", 0.1) +nodes_per_height = config.get_uint_value("nz", 64) + + +turb_pos = np.array([3,3,3])*turbine_diameter +epsilon = config.get_float_value("SmearingWidth", 5) +density = config.get_float_value("Density", 1.225) +level = 0 +n_blades = 3 +n_blade_nodes = config.get_int_value("NumberOfNodesPerAL", 32) + +read_precursor = config.get_bool_value("readPrecursor", False) + +if read_precursor: + nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor") + use_distributions = config.get_bool_value("useDistributions", False) + precursor_directory = config.get_string_value("precursorDirectory") + +# all in s +t_start_out = config.get_float_value("tStartOut") +t_out = config.get_float_value("tOut") +t_end = config.get_float_value("tEnd") # total time of simulation +t_start_averaging = config.get_float_value("tStartAveraging") +t_start_tmp_averaging = config.get_float_value("tStartTmpAveraging") +t_averaging = config.get_float_value("tAveraging") +t_start_out_probe = config.get_float_value("tStartOutProbe") +t_out_probe = config.get_float_value("tOutProbe") + +#%% +length = np.array([6,4,1])*boundary_layer_height +dx = boundary_layer_height/nodes_per_height dt = dx * mach / (np.sqrt(3) * velocity) -velocity_lb = velocity * dt / dx # LB units -viscosity_lb = viscosity * dt / (dx * dx) # LB units +velocity_ratio = dx/dt +velocity_LB = velocity / velocity_ratio # LB units +viscosity_LB = viscosity / (velocity_ratio * dx) # LB units +pressure_gradient = u_star * u_star / boundary_layer_height +pressure_gradient_LB = pressure_gradient * (dt*dt)/dx + +logger.vf_log_info(f"velocity [dx/dt] = {velocity_LB}") +logger.vf_log_info(f"dt = {dt}") +logger.vf_log_info(f"dx = {dx}") +logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}") +logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}") +logger.vf_log_info(f"dpdx = {pressure_gradient}") +logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}") + #%% -para.set_devices([0]) para.set_output_prefix(sim_name) -para.set_output_path(str(output_path)) -para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix()) para.set_print_files(True) -para.set_max_level(1) -#%% -para.set_velocity(velocity_lb) -para.set_viscosity(viscosity_lb) + +para.set_forcing(pressure_gradient_LB, 0, 0) +para.set_velocity_LB(velocity_LB) +para.set_viscosity_LB(viscosity_LB) para.set_velocity_ratio(dx/dt) para.set_viscosity_ratio(dx*dx/dt) -para.set_main_kernel("TurbulentViscosityCumulantK17CompChim") -para.set_use_AMD(True) -para.set_SGS_constant(0.083) +para.set_density_ratio(1.0) -def init_func(coord_x, coord_y, coord_z): - return [0.0, velocity_lb, 0.0, 0.0] +para.set_main_kernel("CumulantK17") -para.set_initial_condition(init_func) -para.set_t_out(int(t_out/dt)) -para.set_t_end(int(t_end/dt)) +para.set_timestep_start_out(int(t_start_out/dt)) +para.set_timestep_out(int(t_out/dt)) +para.set_timestep_end(int(t_end/dt)) para.set_is_body_force(True) - #%% -grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_lb, 0.0, 0.0) - -grid_builder.set_velocity_boundary_condition(gpu.SideType.MY, velocity_lb, 0.0, 0.0) -grid_builder.set_velocity_boundary_condition(gpu.SideType.PY, velocity_lb, 0.0, 0.0) - -grid_builder.set_velocity_boundary_condition(gpu.SideType.MZ, velocity_lb, 0.0, 0.0) -grid_builder.set_velocity_boundary_condition(gpu.SideType.PZ, velocity_lb, 0.0, 0.0) +tm_factory = gpu.TurbulenceModelFactory(para) +tm_factory.read_config_file(config) +#%% +grid_scaling_factory = gpu.GridScalingFactory() +grid_scaling_factory.set_scaling_factory(gpu.GridScaling.ScaleCompressible) -grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0.0) +grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) +grid_builder.set_periodic_boundary_condition(not read_precursor, True, False) +grid_builder.build_grids(basics.LbmOrGks.LBM, False) +sampling_offset = 2 +if read_precursor: + precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK) + grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0) + +grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0, dx) +para.set_has_wall_model_monitor(True) +grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1) + +if read_precursor: + grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0) +bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack) +bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) +bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective) +if read_precursor: + bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor) +para.set_outflow_pressure_correction_factor(0.0); #%% -cuda_memory_manager = gpu.CudaMemoryManager(para) -grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm) +# don't use python init functions, they are very slow! Just kept as an example. +# Define lambda in bindings and set it here. +# def init_func(coord_x, coord_y, coord_z): +# return [ +# 0.0, +# (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1)) * dt / dx, +# 2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.) * dt / dx, +# 8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx] +# para.set_initial_condition(init_func) +para.set_initial_condition_perturbed_log_law(u_star, z0, length[0], length[2], boundary_layer_height, velocity_ratio) + #%% -turb_pos = np.array([3,3,3])*reference_diameter -epsilon = 5 +turb_pos = np.array([3,3,3])*turbine_diameter +epsilon = 1.5*dx density = 1.225 level = 0 n_blades = 3 n_blade_nodes = 32 -alm = gpu.ActuatorLine(n_blades, density, n_blade_nodes, epsilon, *turb_pos, reference_diameter, level, dt, dx) +omega = 1 +blade_radii = np.arange(n_blade_nodes, dtype=np.float32)/(0.5*turbine_diameter) +alm = gpu.ActuatorFarm(n_blades, density, n_blade_nodes, epsilon, level, dt, dx, True) +alm.add_turbine(turb_pos[0],turb_pos[1],turb_pos[2], turbine_diameter, omega, 0, 0, blade_radii) para.add_actuator(alm) #%% -point_probe = gpu.probes.PointProbe("pointProbe", str(output_path), 100, 1, 500, 100) -point_probe.add_probe_points_from_list(np.array([1,2,5])*reference_diameter, np.array([3,3,3])*reference_diameter, np.array([3,3,3])*reference_diameter) -point_probe.add_statistic(gpu.probes.Statistic.Means) - -para.add_probe(point_probe) - -plane_probe = gpu.probes.PlaneProbe("planeProbe", str(output_path), 100, 1, 500, 100) -plane_probe.set_probe_plane(5*reference_diameter, 0, 0, dx, length[1], length[2]) -para.add_probe(plane_probe) +planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z') +planar_average_probe.add_all_available_statistics() +planar_average_probe.set_file_name_to_n_out() +para.add_probe(planar_average_probe) #%% -sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator) +wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) +wall_model_probe.add_all_available_statistics() +wall_model_probe.set_file_name_to_n_out() +wall_model_probe.set_force_output_to_stress(True) +if para.get_is_body_force(): + wall_model_probe.set_evaluate_pressure_gradient(True) +para.add_probe(wall_model_probe) + +plane_locs = [100,] +if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0]) + +for n_probe, probe_pos in enumerate(plane_locs): + plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt)) + plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2]) + plane_probe.add_all_available_statistics() + para.add_probe(plane_probe) +#%% +cuda_memory_manager = gpu.CudaMemoryManager(para) +grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator) +#%% +#%% +sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory, grid_scaling_factory) #%% sim.run() -MPI.Finalize() \ No newline at end of file +MPI.Finalize() + diff --git a/Python/actuator_line/config.txt b/Python/actuator_line/config.txt deleted file mode 100644 index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000 --- a/Python/actuator_line/config.txt +++ /dev/null @@ -1,2 +0,0 @@ -Path = . -GridPath = . diff --git a/Python/actuator_line/configActuatorLine.txt b/Python/actuator_line/configActuatorLine.txt new file mode 100644 index 0000000000000000000000000000000000000000..c45d170f039274ab355f3fe1dc044536f1f29e6f --- /dev/null +++ b/Python/actuator_line/configActuatorLine.txt @@ -0,0 +1,39 @@ +################################################## +#informations for Writing +################################################## +Path = . +################################################## +#informations for reading +################################################## +GridPath = . +################################################## +Devices = 0 +################################################## +tStartOut = 0 +tOut = 100000 +tEnd = 300000 +################################################## +tStartAveraging = 0 +tStartTmpAveraging = 100000 +tAveraging = 200 +tStartOutProbe = 0 +tOutProbe = 1000 +################################################## +Ma = 0.1 +nz = 96 + +bodyForce = true +SGSconstant = 0.333 +TurbulenceModel = QR + +QuadricLimiterP = 100000.0 +QuadricLimiterM = 100000.0 +QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 1 +precursorFile = precursor/Precursor + +################################################## +turbineDiameter = 126.0 diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py index 1c01f50946b49bc0ddab7e50065a24aab4ae869f..6f6c64bc072d3afbb8aa5febbec209c26af2deee 100644 --- a/Python/boundary_layer/boundary_layer.py +++ b/Python/boundary_layer/boundary_layer.py @@ -1,37 +1,48 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file boundary_layer.py +! \ingroup boundary_layer +! \author Henry Korb, Henrik Asmuth +======================================================================================= +""" #%% import numpy as np from pathlib import Path from mpi4py import MPI from pyfluids import basics, gpu, logger #%% -reference_height = 1000 # boundary layer height in m - -length = np.array([6,4,1])*reference_height -viscosity = 1.56e-5 -mach = 0.1 -nodes_per_height = 32 - -z_0 = 0.1 -u_star = 0.4 -kappa = 0.4 - -velocity = 0.5*u_star/kappa*np.log(length[2]/z_0+1) -flow_through_time = length[0]/velocity -use_AMD = True - - -sim_name = "BoundaryLayer" -config_file = Path(__file__).parent/Path("config.txt") +sim_name = "ABL" +config_file = Path(__file__).parent/"configBoundaryLayer.txt" output_path = Path(__file__).parent/Path("output") output_path.mkdir(exist_ok=True) -t_out = 1000. -t_end = 5000. -t_start_averaging = 0 -t_start_tmp_averaging = 100_000 -t_averaging = 200 -t_start_out_probe = 0 -t_out_probe = 1000 #%% logger.Logger.initialize_logger() @@ -39,95 +50,161 @@ basics.logger.Logger.add_stdout() basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW) basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE) basics.logger.Logger.enable_printed_rank_numbers(True) -# %% -comm = gpu.Communicator.get_instance() #%% grid_factory = gpu.grid_generator.GridFactory.make() grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory) +communicator = gpu.Communicator.get_instance() + +config = basics.ConfigurationFile() +config.load(str(config_file)) + +para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config) +bc_factory = gpu.BoundaryConditionFactory() #%% -dx = reference_height/nodes_per_height -dt = dx * mach / (np.sqrt(3) * velocity) -velocity_lb = velocity * dt / dx # LB units -viscosity_lb = viscosity * dt / (dx * dx) # LB units +boundary_layer_height = config.get_float_value("boundaryLayerHeight", 1000) +z0 = config.get_float_value("z0", 0.1) +u_star = config.get_float_value("u_star", 0.4) -pressure_gradient = u_star**2 / reference_height -pressure_gradient_lb = pressure_gradient * dt**2 / dx +kappa = config.get_float_value("vonKarmanConstant", 0.4) # von Karman constant -logger.vf_log_info(f"velocity = {velocity_lb:1.6} dx/dt") -logger.vf_log_info(f"dt = {dt:1.6}") -logger.vf_log_info(f"dx = {dx:1.6}") -logger.vf_log_info(f"u* = {u_star:1.6}") -logger.vf_log_info(f"dpdx = {pressure_gradient:1.6}") -logger.vf_log_info(f"dpdx = {pressure_gradient_lb:1.6} dx/dt^2") -logger.vf_log_info(f"viscosity = {viscosity_lb:1.6} dx^2/dt") +viscosity = config.get_float_value("viscosity", 1.56e-5) +velocity = 0.5*u_star/kappa*np.log(boundary_layer_height/z0+1) #0.5 times max mean velocity at the top in m/s -#%% -config = basics.ConfigurationFile() -config.load(str(config_file)) -#%% -para = gpu.Parameter(config, comm.get_number_of_process(), comm.get_pid()) +mach = config.get_float_value("Ma", 0.1) +nodes_per_height = config.get_uint_value("nz", 64) + + + +write_precursor = config.get_bool_value("_p", False) +read_precursor = config.get_bool_value("readPrecursor", False) + +if write_precursor: + nTWritePrecursor = config.get_int_value("nTimestepsWritePrecursor") + t_start_precursor = config.get_float_value("tStartPrecursor") + pos_x_precursor = config.get_float_value("posXPrecursor") +if read_precursor: + nTReadPrecursor = config.get_int_value("nTimestepsReadPrecursor") +if write_precursor or read_precursor: + use_distributions = config.get_bool_value("useDistributions", False) + precursor_directory = config.get_string_value("precursorDirectory") + +# all in s +t_start_out = config.get_float_value("tStartOut") +t_out = config.get_float_value("tOut") +t_end = config.get_float_value("tEnd") # total time of simulation + +t_start_averaging = config.get_float_value("tStartAveraging") +t_start_tmp_averaging = config.get_float_value("tStartTmpAveraging") +t_averaging = config.get_float_value("tAveraging") +t_start_out_probe = config.get_float_value("tStartOutProbe") +t_out_probe = config.get_float_value("tOutProbe") + +#%% +length = np.array([6,4,1])*boundary_layer_height +dx = boundary_layer_height/nodes_per_height +dt = dx * mach / (np.sqrt(3) * velocity) +velocity_LB = velocity * dt / dx # LB units +viscosity_LB = viscosity * dt / (dx * dx) # LB units +pressure_gradient = u_star * u_star / boundary_layer_height +pressure_gradient_LB = pressure_gradient * (dt*dt)/dx + +logger.vf_log_info(f"velocity [dx/dt] = {velocity_LB}") +logger.vf_log_info(f"dt = {dt}") +logger.vf_log_info(f"dx = {dx}") +logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}") +logger.vf_log_info(f"u* /(dx/dt) = {u_star*dt/dx}") +logger.vf_log_info(f"dpdx = {pressure_gradient}") +logger.vf_log_info(f"dpdx /(dx/dt^2) = {pressure_gradient_LB}") + +#%% #%% -para.set_devices([0]) para.set_output_prefix(sim_name) -para.set_output_path(str(output_path)) -para.set_f_name(para.get_output_path() + "/" + para.get_output_prefix()) para.set_print_files(True) -para.set_max_level(1) -#%% -para.set_velocity(velocity_lb) -para.set_viscosity(viscosity_lb) + +para.set_forcing(pressure_gradient_LB, 0, 0) +para.set_velocity_LB(velocity_LB) +para.set_viscosity_LB(viscosity_LB) para.set_velocity_ratio(dx/dt) para.set_viscosity_ratio(dx*dx/dt) -para.set_use_AMD(use_AMD) +para.set_density_ratio(1.0) + +para.set_main_kernel("CumulantK17") -para.set_main_kernel("TurbulentViscosityCumulantK17CompChim" if para.get_use_AMD() else "CummulantK17CompChim") +para.set_timestep_start_out(int(t_start_out/dt)) +para.set_timestep_out(int(t_out/dt)) +para.set_timestep_end(int(t_end/dt)) +para.set_is_body_force(config.get_bool_value("bodyForce")) +#%% +tm_factory = gpu.TurbulenceModelFactory(para) +tm_factory.read_config_file(config) +#%% +grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) +grid_builder.set_periodic_boundary_condition(not read_precursor, True, False) +grid_builder.build_grids(basics.LbmOrGks.LBM, False) -para.set_SGS_constant(0.083) +sampling_offset = 2 +if read_precursor: + precursor = gpu.create_file_collection(precursor_directory + "/precursor", gpu.FileType.VTK) + grid_builder.set_precursor_boundary_condition(gpu.SideType.MX, precursor, nTReadPrecursor, 0, 0, 0) +grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0, 0, 1, sampling_offset, z0/dx) +para.set_has_wall_model_monitor(True) +grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0, 0, -1) + +if read_precursor: + grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0) +bc_factory.set_stress_boundary_condition(gpu.StressBC.StressPressureBounceBack) +bc_factory.set_slip_boundary_condition(gpu.SlipBC.SlipBounceBack) +bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective) +bc_factory.set_precursor_boundary_condition(gpu.PrecursorBC.DistributionsPrecursor if use_distributions else gpu.PrecursorBC.VelocityPrecursor) +para.set_outflow_pressure_correction_factor(0.0); +#%% def init_func(coord_x, coord_y, coord_z): return [ 0.0, - (u_star/kappa*np.log(max(coord_z/z_0,0)+1) + 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2]))/((coord_z/reference_height)**2+0.1)*dt/dx, - 2*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/length[2])/((coord_z/reference_height)**2+0.1)*dt/dx, - 8*u_star/kappa*(np.sin(np.pi*8*coord_y/reference_height)*np.sin(np.pi*8*coord_z/reference_height)+np.sin(np.pi*8*coord_x/length[0]))/((length[2]/2-coord_z)**2+0.1)*dt/dx - ] - + (u_star/0.4 * np.log(np.maximum(coord_z,z0)/z0) + 2.0*np.sin(np.pi*16*coord_x/length[0])*np.sin(np.pi*8*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1)) * dt / dx, + 2.0*np.sin(np.pi*16.*coord_x/length[0])*np.sin(np.pi*8.*coord_z/boundary_layer_height)/(np.square(coord_z/boundary_layer_height)+1.) * dt / dx, + 8.0*u_star/0.4*(np.sin(np.pi*8.0*coord_y/boundary_layer_height)*np.sin(np.pi*8.0*coord_z/boundary_layer_height)+np.sin(np.pi*8.0*coord_x/length[0]))/(np.square(length[2]/2.0-coord_z)+1.) * dt / dx] para.set_initial_condition(init_func) -para.set_t_out(int(t_out/dt)) -para.set_t_end(int(t_end/dt)) -para.set_is_body_force(True) -para.set_has_wall_model_monitor(True) - -grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx) -grid_builder.set_periodic_boundary_condition(True, True, False) -grid_builder.build_grids(basics.LbmOrGks.LBM, False) #%% -sampling_offset = 2 -grid_builder.set_stress_boundary_condition(gpu.SideType.MZ, 0.0, 0.0, 1.0, sampling_offset, z_0/dx) -grid_builder.set_slip_boundary_condition(gpu.SideType.PZ, 0.0, 0.0, 0.0) +planar_average_probe = gpu.probes.PlanarAverageProbe("horizontalPlanes", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt) , int(t_start_out_probe/dt), int(t_out_probe/dt), 'z') +planar_average_probe.add_all_available_statistics() +planar_average_probe.set_file_name_to_n_out() +para.add_probe(planar_average_probe) +#%% +wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), 0, int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) +wall_model_probe.add_all_available_statistics() +wall_model_probe.set_file_name_to_n_out() +wall_model_probe.set_force_output_to_stress(True) +if para.get_is_body_force(): + wall_model_probe.set_evaluate_pressure_gradient(True) +para.add_probe(wall_model_probe) + +plane_locs = [100,] +if read_precursor: plane_locs.extend([1000, 1500, 2000, 2500, 0]) + +for n_probe, probe_pos in enumerate(plane_locs): + plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), 10, int(t_start_out_probe/dt), int(t_out_probe/dt)) + plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2]) + plane_probe.add_all_available_statistics() + para.add_probe(plane_probe) + +if write_precursor: + precursor_writer = gpu.PrecursorWriter("precursor", para.get_output_path() + precursor_directory, pos_x_precursor, 0,length[1], 0, length[2], t_start_precursor/dt, nTWritePrecursor, gpu.OutputVariable.Distributions if use_distributions else gpu.OutputVariable.Velocities) + para.add_probe(precursor_writer) #%% cuda_memory_manager = gpu.CudaMemoryManager(para) -grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, comm) - +grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator) #%% -wall_probe = gpu.probes.WallModelProbe("wallModelProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt/4), int(t_start_out_probe/dt), int(t_out_probe/dt)) -wall_probe.add_all_available_statistics() -wall_probe.set_file_name_to_n_out() -wall_probe.set_force_output_to_stress(True) -if para.get_is_body_force(): - wall_probe.set_evaluate_pressure_gradient(True) -planar_probe = gpu.probes.PlanarAverageProbe("planarAverageProbe", str(output_path), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt), "z") -para.add_probe(wall_probe) - #%% -sim = gpu.Simulation(para, cuda_memory_manager, comm, grid_generator) +sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory) #%% sim.run() MPI.Finalize() \ No newline at end of file diff --git a/Python/boundary_layer/config.txt b/Python/boundary_layer/config.txt deleted file mode 100644 index e4c778c4cc048f54c0a32310e6bf4a7343a263fa..0000000000000000000000000000000000000000 --- a/Python/boundary_layer/config.txt +++ /dev/null @@ -1,2 +0,0 @@ -Path = . -GridPath = . diff --git a/Python/boundary_layer/configBoundaryLayer.txt b/Python/boundary_layer/configBoundaryLayer.txt new file mode 100644 index 0000000000000000000000000000000000000000..83e7861a5fb85ea800d187699f1c6c1409422f0a --- /dev/null +++ b/Python/boundary_layer/configBoundaryLayer.txt @@ -0,0 +1,42 @@ +################################################## +#informations for Writing +################################################## +Path = . +################################################## +#informations for reading +################################################## +GridPath = . +################################################## +Devices = 0 +################################################## +tStartOut = 0 +tOut = 100000 +tEnd = 300000 +################################################## +tStartAveraging = 0 +tStartTmpAveraging = 100000 +tAveraging = 200 +tStartOutProbe = 0 +tOutProbe = 1000 +################################################## +Ma = 0.1 +nz = 96 + +bodyForce = true +UseAMD = true +SGSconstant = 0.2 +QuadricLimiterP = 100000.0 +QuadricLimiterM = 100000.0 +QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 10 +precursorFile = precursor/Precursor + +################################################## +writePrecursor = false +nTimestepsWritePrecursor = 10 + +tStartPrecursor = 100 +posXPrecursor = 3000 \ No newline at end of file diff --git a/Python/cubeflow/simulation.py b/Python/cubeflow/simulation.py index 9e77e8d747c072188d8d81150afa8e2ccb76a792..deb0411963aec65522af45cc48d7367f103232c6 100644 --- a/Python/cubeflow/simulation.py +++ b/Python/cubeflow/simulation.py @@ -1,13 +1,42 @@ -from pyfluids.cpu import Simulation -from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition, DensityBoundaryCondition -from pyfluids.cpu.geometry import GbCuboid3D -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import PhysicalParameters, RuntimeParameters, GridParameters -from pyfluids.cpu.writer import Writer, OutputFormat -from pymuparser import Parser - +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file simulation.py +! \ingroup cubeflow +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import os +from pyfluids import cpu +from pymuparser import Parser + def get_max_length(number_of_nodes_per_direction, delta_x): return (number_of_nodes_per_direction[0] * delta_x, @@ -15,10 +44,10 @@ def get_max_length(number_of_nodes_per_direction, delta_x): number_of_nodes_per_direction[2] * delta_x) -physical_params = PhysicalParameters() +physical_params = cpu.parameters.PhysicalParameters() physical_params.lattice_viscosity = 0.005 -grid_params = GridParameters() +grid_params = cpu.parameters.GridParameters() grid_params.number_of_nodes_per_direction = [200, 120, 120] grid_params.blocks_per_direction = [2, 2, 2] grid_params.node_distance = 0.125 @@ -26,7 +55,7 @@ grid_params.periodic_boundary_in_x1 = False grid_params.periodic_boundary_in_x2 = True grid_params.periodic_boundary_in_x3 = True -runtime_params = RuntimeParameters() +runtime_params = cpu.parameters.RuntimeParameters() runtime_params.timestep_log_interval = 1000 runtime_params.number_of_timesteps = 50000 runtime_params.number_of_threads = int(os.environ.get("OMP_NUM_THREADS", 4)) @@ -39,46 +68,46 @@ def run_simulation(physical_parameters=physical_params, grid_parameters=grid_par min_x, min_y, min_z = 0, 0, 0 max_x, max_y, max_z = get_max_length(grid_parameters.number_of_nodes_per_direction, grid_parameters.node_distance) - bottom_wall = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z, max_x + wall_thickness, + bottom_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z, max_x + wall_thickness, max_y + wall_thickness, min_z - wall_thickness) - top_wall = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, max_z, max_x + wall_thickness, + top_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, max_z, max_x + wall_thickness, max_y + wall_thickness, max_z + wall_thickness) - left_wall = GbCuboid3D(min_x - wall_thickness, min_y, min_z - wall_thickness, max_x + wall_thickness, + left_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y, min_z - wall_thickness, max_x + wall_thickness, min_y - wall_thickness, max_z + wall_thickness) - right_wall = GbCuboid3D(min_x - wall_thickness, max_y, min_z - wall_thickness, max_x + wall_thickness, + right_wall = cpu.geometry.GbCuboid3D(min_x - wall_thickness, max_y, min_z - wall_thickness, max_x + wall_thickness, max_y + wall_thickness, max_z + wall_thickness) - obstacle = GbCuboid3D(7, 7, 7, 8, 8, 8) + obstacle = cpu.geometry.GbCuboid3D(7, 7, 7, 8, 8, 8) - velocity_boundary = GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z - wall_thickness, min_x, + velocity_boundary = cpu.geometry.GbCuboid3D(min_x - wall_thickness, min_y - wall_thickness, min_z - wall_thickness, min_x, max_y + wall_thickness, max_z + wall_thickness) - outflow_boundary = GbCuboid3D(max_x, min_y - wall_thickness, min_z - wall_thickness, max_x + wall_thickness, + outflow_boundary = cpu.geometry.GbCuboid3D(max_x, min_y - wall_thickness, min_z - wall_thickness, max_x + wall_thickness, max_y + wall_thickness, max_z + wall_thickness) - no_slip_bc = NoSlipBoundaryCondition() + no_slip_bc = cpu.boundaryconditions.NoSlipBoundaryCondition() - outflow_bc = DensityBoundaryCondition() + outflow_bc = cpu.boundaryconditions.DensityBoundaryCondition() velocity_function = Parser() velocity_function.define_constant("u", 0.07) velocity_function.expression = "u" - velocity_bc = VelocityBoundaryCondition(True, False, False, velocity_function, 0, -10) + velocity_bc = cpu.boundaryconditions.VelocityBoundaryCondition(True, False, False, velocity_function, 0, -10) - kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) + kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) # kernel.use_forcing = True # kernel.forcing_in_x1 = 3e-6 - writer = Writer() + writer = cpu.writer.Writer() writer.output_path = "./output" - writer.output_format = OutputFormat.BINARY + writer.output_format = cpu.writer.OutputFormat.BINARY - simulation = Simulation() + simulation = cpu.Simulation() simulation.set_writer(writer) simulation.set_physical_parameters(physical_parameters) diff --git a/Python/liddrivencavity/simulation.py b/Python/liddrivencavity/simulation.py index 155fad2f6f8aade0368c8a7006b88f7985f8822c..3c247b87a102e3c5a720f20748acc9f9f50bb178 100644 --- a/Python/liddrivencavity/simulation.py +++ b/Python/liddrivencavity/simulation.py @@ -1,32 +1,61 @@ -from pyfluids.cpu import Simulation -from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition, VelocityBoundaryCondition -from pyfluids.cpu.geometry import GbCuboid3D -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters -from pyfluids.cpu.writer import Writer, OutputFormat +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file simulation.py +! \ingroup liddrivencavity +! \author Sven Marcus, Henry Korb +======================================================================================= +""" +from pyfluids import cpu from pymuparser import Parser -runtime_params = RuntimeParameters() +runtime_params = cpu.parameters.RuntimeParameters() runtime_params.number_of_threads = 4 runtime_params.number_of_timesteps = 10000 runtime_params.timestep_log_interval = 1000 -physical_params = PhysicalParameters() +physical_params = cpu.parameters.PhysicalParameters() physical_params.lattice_viscosity = 0.005 -grid_params = GridParameters() +grid_params = cpu.parameters.GridParameters() grid_params.number_of_nodes_per_direction = [64, 64, 64] grid_params.blocks_per_direction = [2, 2, 2] grid_params.node_distance = 1 / 10 def run_simulation(physical_params=physical_params, grid_params=grid_params, runtime_params=runtime_params): - simulation = Simulation() - kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) + simulation = cpu.Simulation() + kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) - writer = Writer() + writer = cpu.writer.Writer() writer.output_path = "./output" - writer.output_format = OutputFormat.BINARY + writer.output_format = cpu.writer.OutputFormat.BINARY simulation.set_grid_parameters(grid_params) simulation.set_physical_parameters(physical_params) @@ -34,12 +63,12 @@ def run_simulation(physical_params=physical_params, grid_params=grid_params, run simulation.set_kernel_config(kernel) simulation.set_writer(writer) - no_slip_bc_adapter = NoSlipBoundaryCondition() + no_slip_bc_adapter = cpu.boundaryconditions.NoSlipBoundaryCondition() fct = Parser() fct.expression = "u" fct.define_constant("u", 0.005) - velocity_bc_adapter = VelocityBoundaryCondition(True, True, False, fct, 0, -10.0) + velocity_bc_adapter = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, False, fct, 0, -10.0) g_min_x1, g_min_x2, g_min_x3 = 0, 0, 0 g_max_x1 = grid_params.number_of_nodes_per_direction[0] * grid_params.node_distance @@ -48,12 +77,12 @@ def run_simulation(physical_params=physical_params, grid_params=grid_params, run dx = grid_params.node_distance - wall_x_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_min_x1, g_max_x2 + dx, g_max_x3) - wall_x_max = GbCuboid3D(g_max_x1, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3) - wall_y_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_min_x2, g_max_x3) - wall_y_max = GbCuboid3D(g_min_x1 - dx, g_max_x2, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3) - wall_z_min = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_min_x3) - wall_z_max = GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_max_x3, g_max_x1 + dx, g_max_x2 + dx, g_max_x3 + dx) + wall_x_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_min_x1, g_max_x2 + dx, g_max_x3) + wall_x_max = cpu.geometry.GbCuboid3D(g_max_x1, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3) + wall_y_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_min_x2, g_max_x3) + wall_y_max = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_max_x2, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_max_x3) + wall_z_min = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_min_x3 - dx, g_max_x1 + dx, g_max_x2 + dx, g_min_x3) + wall_z_max = cpu.geometry.GbCuboid3D(g_min_x1 - dx, g_min_x2 - dx, g_max_x3, g_max_x1 + dx, g_max_x2 + dx, g_max_x3 + dx) simulation.add_object(wall_x_min, no_slip_bc_adapter, 1, "/geo/wallXmin") simulation.add_object(wall_x_max, no_slip_bc_adapter, 1, "/geo/wallXmax") diff --git a/Python/poiseuille/poiseuille_hpc.py b/Python/poiseuille/poiseuille_hpc.py index f5f5a1387c9fe234abae0c6f979cc7d5b283d1a4..b108f34445a71a686c4e22f685e26e10204113b3 100644 --- a/Python/poiseuille/poiseuille_hpc.py +++ b/Python/poiseuille/poiseuille_hpc.py @@ -1,15 +1,49 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file poiseuille_hpc.py +! \ingroup poiseuille +! \author Sven Marcus, Henry Korb +======================================================================================= +""" from poiseuille.simulation import run_simulation -from pyfluids.cpu.parameters import * +from pyfluids import cpu -grid_parameters = GridParameters() +grid_parameters = cpu.prameters.GridParameters() grid_parameters.number_of_nodes_per_direction = [64, 64, 512] grid_parameters.node_distance = 1 grid_parameters.blocks_per_direction = [1, 2, 2] -physical_parameters = PhysicalParameters() +physical_parameters = cpu.prameters.PhysicalParameters() physical_parameters.lattice_viscosity = 0.0005 -runtime_parameters = RuntimeParameters() +runtime_parameters = cpu.prameters.RuntimeParameters() runtime_parameters.number_of_threads = 4 runtime_parameters.number_of_timesteps = 1000 runtime_parameters.timestep_log_interval = 100 diff --git a/Python/poiseuille/simulation.py b/Python/poiseuille/simulation.py index d107801fa84cfe16d1d7e91d31dc3ff4b8671f02..a6f12e59fbd0a0ccad9a4db9ccde69b828cf90bf 100644 --- a/Python/poiseuille/simulation.py +++ b/Python/poiseuille/simulation.py @@ -1,35 +1,65 @@ -from pyfluids.cpu import Simulation -from pyfluids.cpu.boundaryconditions import NoSlipBoundaryCondition -from pyfluids.cpu.geometry import GbCuboid3D, State -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import RuntimeParameters, GridParameters, PhysicalParameters -from pyfluids.cpu.writer import Writer, OutputFormat - -default_grid_params = GridParameters() +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file simulation.py +! \ingroup poiseuille +! \author Sven Marcus, Henry Korb +======================================================================================= +""" +from pyfluids import cpu + + +default_grid_params = cpu.parameters.GridParameters() default_grid_params.node_distance = 10 / 32 default_grid_params.number_of_nodes_per_direction = [8, 8, 32] default_grid_params.blocks_per_direction = [1, 1, 4] default_grid_params.periodic_boundary_in_x1 = True default_grid_params.periodic_boundary_in_x2 = True -default_physical_params = PhysicalParameters() +default_physical_params = cpu.parameters.PhysicalParameters() default_physical_params.lattice_viscosity = 0.005 -default_runtime_params = RuntimeParameters() +default_runtime_params = cpu.parameters.RuntimeParameters() default_runtime_params.number_of_threads = 4 default_runtime_params.number_of_timesteps = 10000 default_runtime_params.timestep_log_interval = 1000 -default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) +default_kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) default_kernel.use_forcing = True default_kernel.forcing_in_x1 = 1e-8 -default_writer = Writer() +default_writer = cpu.writer.Writer() default_writer.output_path = "./output" -default_writer.output_format = OutputFormat.BINARY +default_writer.output_format = cpu.writer.OutputFormat.BINARY -default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) +default_kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) default_kernel.use_forcing = True default_kernel.forcing_in_x1 = 1e-8 @@ -39,7 +69,7 @@ def run_simulation(physical_params=default_physical_params, runtime_params=default_runtime_params, kernel=default_kernel, writer=default_writer): - simulation = Simulation() + simulation = cpu.Simulation() simulation.set_kernel_config(kernel) simulation.set_physical_parameters(physical_params) @@ -47,11 +77,11 @@ def run_simulation(physical_params=default_physical_params, simulation.set_runtime_parameters(runtime_params) simulation.set_writer(writer) - no_slip_bc = NoSlipBoundaryCondition() + no_slip_bc = cpu.boundaryconditions.NoSlipBoundaryCondition() block_thickness = 3 * grid_params.node_distance simulation.add_object( - GbCuboid3D( + cpu.geometry.GbCuboid3D( grid_params.bounding_box.min_x1 - block_thickness, grid_params.bounding_box.min_x2 - block_thickness, grid_params.bounding_box.min_x3 - block_thickness, @@ -59,10 +89,10 @@ def run_simulation(physical_params=default_physical_params, grid_params.bounding_box.max_x2 + block_thickness, grid_params.bounding_box.min_x3), no_slip_bc, - State.SOLID, "/geo/addWallZMin") + cpu.geometry.State.SOLID, "/geo/addWallZMin") simulation.add_object( - GbCuboid3D( + cpu.geometry.GbCuboid3D( grid_params.bounding_box.min_x1 - block_thickness, grid_params.bounding_box.min_x2 - block_thickness, grid_params.bounding_box.max_x3, @@ -70,7 +100,7 @@ def run_simulation(physical_params=default_physical_params, grid_params.bounding_box.max_x2 + block_thickness, grid_params.bounding_box.max_x3 + block_thickness), no_slip_bc, - State.SOLID, "/geo/addWallZMax") + cpu.geometry.State.SOLID, "/geo/addWallZMax") simulation.run_simulation() diff --git a/Python/poiseuille/test_poiseuille_l2.py b/Python/poiseuille/test_poiseuille_l2.py index 93aa2600d5260dea7e72f3aa98db7334fe5285c6..818cba40e115945c60e4fa2ac96b3b6b5ab0bba8 100644 --- a/Python/poiseuille/test_poiseuille_l2.py +++ b/Python/poiseuille/test_poiseuille_l2.py @@ -1,3 +1,37 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file test_poiseuille_l2.py +! \ingroup poiseuille +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import os import shutil import unittest @@ -5,8 +39,7 @@ import unittest import matplotlib.pyplot as plt import numpy as np import pyvista as pv -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters +from pyfluids import cpu from scipy import stats from errors import normalized_l2_error @@ -33,13 +66,13 @@ class TestPoiseuilleFlow(unittest.TestCase): self.skipTest("This test is not implemented correctly yet") plt.ion() - physical_params = PhysicalParameters() + physical_params = cpu.parameters.PhysicalParameters() - runtime_params = RuntimeParameters() + runtime_params = cpu.parameters.RuntimeParameters() runtime_params.number_of_threads = os.cpu_count() runtime_params.timestep_log_interval = 10000 - kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) + kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) kernel.use_forcing = True normalized_l2_errors = [] @@ -140,7 +173,7 @@ def get_heights_from_indices(mesh, indices): def create_grid_params_with_nodes_in_column(nodes_in_column, delta_x): - grid_params = GridParameters() + grid_params = cpu.parameters.GridParameters() grid_params.node_distance = delta_x grid_params.number_of_nodes_per_direction = [1, 1, nodes_in_column] grid_params.blocks_per_direction = [1, 1, 8] diff --git a/Python/tests/test_acousticscaling.py b/Python/tests/test_acousticscaling.py index 6413123a80db8c5882fcf1dbe6f72a1f5438736c..02454b935e3a147e045f45c273392646aeca6b8c 100644 --- a/Python/tests/test_acousticscaling.py +++ b/Python/tests/test_acousticscaling.py @@ -1,9 +1,41 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file test_acousticscaling.py +! \ingroup tests +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import unittest from typing import List -from pyfluids.cpu.kernel import LBMKernel, KernelType -from pyfluids.cpu.parameters import GridParameters, PhysicalParameters, RuntimeParameters - +from pyfluids import cpu from acousticscaling import OneDirectionalAcousticScaling @@ -58,18 +90,18 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase): self.assertEqual(self.grid_params.periodic_boundary_in_x2, actual_grid_params.periodic_boundary_in_x2) self.assertEqual(self.grid_params.periodic_boundary_in_x3, actual_grid_params.periodic_boundary_in_x3) - def assert_physical_params_scaled_by_factor(self, actual_params: PhysicalParameters, factor: int): + def assert_physical_params_scaled_by_factor(self, actual_params: cpu.parameters.PhysicalParameters, factor: int): self.assertEqual(self.physical_params.lattice_viscosity * factor, actual_params.lattice_viscosity) self.assertEqual(self.physical_params.bulk_viscosity_factor, actual_params.bulk_viscosity_factor) - def assert_runtime_params_scaled_by_factor(self, actual_params: RuntimeParameters, factor: int): + def assert_runtime_params_scaled_by_factor(self, actual_params: cpu.parameters.RuntimeParameters, factor: int): self.assertEqual(self.runtime_params.number_of_timesteps * factor, actual_params.number_of_timesteps) self.assertEqual(self.runtime_params.number_of_threads, actual_params.number_of_threads) self.assertEqual(self.runtime_params.timestep_log_interval, actual_params.timestep_log_interval) - def assert_kernel_forcing_scaled_by_factor(self, actual_kernel: LBMKernel, factor: int): + def assert_kernel_forcing_scaled_by_factor(self, actual_kernel: cpu.kernel.LBMKernel, factor: int): self.assertEqual(self.kernel.type, actual_kernel.type) - self.assertEqual(self.kernel.use_forcing, actual_kernel.use_forcing) + self.assertEqual(self.kernel.use_forcing, actual_kernel.cpu.parameters.use_forcing) self.assertAlmostEqual(self.kernel.forcing_in_x1 / factor, actual_kernel.forcing_in_x1) self.assertAlmostEqual(self.kernel.forcing_in_x2, actual_kernel.forcing_in_x2) self.assertAlmostEqual(self.kernel.forcing_in_x3, actual_kernel.forcing_in_x3) @@ -80,14 +112,14 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase): @staticmethod def make_kernel(): - kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity) + kernel = cpu.kernel.LBMKernel(cpu.kernel.KernelType.CompressibleCumulantFourthOrderViscosity) kernel.use_forcing = True kernel.forcing_in_x1 = 5e-10 return kernel @staticmethod def make_runtime_params(): - runtime_params = RuntimeParameters() + runtime_params = cpu.parameters.RuntimeParameters() runtime_params.number_of_threads = 4 runtime_params.number_of_timesteps = 4_000_000 runtime_params.timestep_log_interval = 1_000_000 @@ -95,13 +127,13 @@ class OneDirectionalAcousticScalingTest(unittest.TestCase): @staticmethod def make_physical_params(): - physical_params = PhysicalParameters() + physical_params = cpu.parameters.PhysicalParameters() physical_params.lattice_viscosity = 1e-4 return physical_params @staticmethod def make_grid_params(): - grid_params = GridParameters() + grid_params = cpu.parameters.GridParameters() grid_params.node_distance = 1 grid_params.number_of_nodes_per_direction = [1, 1, 16] grid_params.blocks_per_direction = [1, 1, 16] diff --git a/Python/tests/test_boundaryconditions.py b/Python/tests/test_boundaryconditions.py index e004ddfa21c78ea3d63a89f5dbc3bd7438a18ff1..d914c50cad2051188331b2efe604907091fa731e 100644 --- a/Python/tests/test_boundaryconditions.py +++ b/Python/tests/test_boundaryconditions.py @@ -1,5 +1,39 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file test_boundaryconditions.py +! \ingroup tests +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import unittest -from pyfluids.cpu.boundaryconditions import * +from pyfluids import cpu class BoundaryConditionsTest(unittest.TestCase): @@ -8,13 +42,13 @@ class BoundaryConditionsTest(unittest.TestCase): """ Should be able to create NoSlipBoundaryCondition """ - sut = NoSlipBoundaryCondition() + sut = cpu.boundaryconditions.NoSlipBoundaryCondition() def test__can_create_velocity_bc(self): """ Should be able to create VelocityBoundaryCondition """ - sut = VelocityBoundaryCondition() + sut = cpu.boundaryconditions.VelocityBoundaryCondition() def test__can_create_velocity_bc_with_directions_function_and_time(self): """ @@ -24,7 +58,7 @@ class BoundaryConditionsTest(unittest.TestCase): parser = Parser() parser.expression = "1" - sut = VelocityBoundaryCondition(True, True, True, parser, 0, 1) + sut = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, True, parser, 0, 1) def test__can_create_velocity_bc_with_directions__function_per_direction__and__time(self): """ @@ -40,7 +74,7 @@ class BoundaryConditionsTest(unittest.TestCase): f3 = Parser() f3.expression = "1" - sut = VelocityBoundaryCondition(True, True, True, f1, f2, f3, 0, 1) + sut = cpu.boundaryconditions.VelocityBoundaryCondition(True, True, True, f1, f2, f3, 0, 1) def test__can_create_velocity_bc_with_speeds_and_times_per_direction(self): """ @@ -51,11 +85,11 @@ class BoundaryConditionsTest(unittest.TestCase): start2, end2 = 1, 2 start3, end3 = 2, 3 - sut = VelocityBoundaryCondition(vx1, start1, end1, vx2, start2, end2, vx3, start3, end3) + sut = cpu.boundaryconditions.VelocityBoundaryCondition(vx1, start1, end1, vx2, start2, end2, vx3, start3, end3) def test__can_create_non_reflecting_outflow(self): """ Should be able to create NonReflectingOutflow """ - sut = NonReflectingOutflow() + sut = cpu.boundaryconditions.NonReflectingOutflow() diff --git a/Python/tests/test_geometry.py b/Python/tests/test_geometry.py index 5bb89eb245b6055653b78fde381da050d402b0cc..3d297f5c176cd99f7969adf37333588d86b77627 100644 --- a/Python/tests/test_geometry.py +++ b/Python/tests/test_geometry.py @@ -1,6 +1,40 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file test_geometry.py +! \ingroup tests +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import unittest -from pyfluids.cpu.geometry import * +from pyfluids import cpu class TestGeometry(unittest.TestCase): @@ -9,7 +43,7 @@ class TestGeometry(unittest.TestCase): """ WHEN setting point coordinates in constructor THEN point should have coordinates """ - sut = GbPoint3D(4, 8, 3) + sut = cpu.geometry.GbPoint3D(4, 8, 3) self.assertEqual(sut.x1, 4) self.assertEqual(sut.x2, 8) @@ -19,7 +53,7 @@ class TestGeometry(unittest.TestCase): """ WHEN setting point coordinates THEN point should have coordinates """ - sut = GbPoint3D() + sut = cpu.geometry.GbPoint3D() sut.x1 = 4 sut.x2 = 8 @@ -33,10 +67,10 @@ class TestGeometry(unittest.TestCase): """ WHEN setting line points THEN line should have points """ - sut = GbLine3D() + sut = cpu.geometry.GbLine3D() - point1 = GbPoint3D() - point2 = GbPoint3D() + point1 = cpu.geometry.GbPoint3D() + point2 = cpu.geometry.GbPoint3D() sut.point1 = point1 sut.point2 = point2 diff --git a/Python/tests/test_kernel.py b/Python/tests/test_kernel.py index 8f58a1c869f9e292856268d43245a75f1dcfe213..e0159bec6802cb08d73214038b177091879fee46 100644 --- a/Python/tests/test_kernel.py +++ b/Python/tests/test_kernel.py @@ -1,12 +1,46 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file test_kernel.py +! \ingroup tests +! \author Sven Marcus, Henry Korb +======================================================================================= +""" import unittest -from pyfluids.cpu.kernel import LBMKernel, KernelType +from pyfluids import cpu class TestLBMKernel(unittest.TestCase): def setUp(self) -> None: - self.sut = LBMKernel(KernelType.BGK) + self.sut = cpu.kernel.LBMKernel(cpu.kernel.KernelType.BGK) def test_lbm_kernel__when_use_forcing_set_to_true__use_forcing_should_be_true(self) -> None: """ @@ -57,4 +91,4 @@ class TestLBMKernel(unittest.TestCase): """ actual = self.sut.type - self.assertEqual(KernelType.BGK, actual) + self.assertEqual(cpu.kernel.KernelType.BGK, actual) diff --git a/apps/cpu/ConvectionOfVortex/CMakeLists.txt b/apps/cpu/ConvectionOfVortex/CMakeLists.txt index de3034c04bb2f2f16edd9b4bf48db81c83d15b3e..33d60676c7e0dfdde411c3c5b92a2534ea54fbfe 100644 --- a/apps/cpu/ConvectionOfVortex/CMakeLists.txt +++ b/apps/cpu/ConvectionOfVortex/CMakeLists.txt @@ -1,3 +1,6 @@ +######################################################## +## C++ PROJECT ### +######################################################## PROJECT(ConvectionOfVortex) vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore basics ${MPI_CXX_LIBRARIES} FILES cov.cpp ) diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp index 58e5aede18b9c4197b4d21b129c6347023b9390e..9d982ebac0059b4512041194100f6e1fdfa61924 100644 --- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp +++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp @@ -1,4 +1,35 @@ - +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ActuatorLine.cpp +//! \ingroup ActuatorLine +//! \author Henry Korb, Henrik Asmuth +//======================================================================================= #define _USE_MATH_DEFINES #include <math.h> #include <string> @@ -28,12 +59,14 @@ #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" #include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" + #include "GridGenerator/grid/GridFactory.h" #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" -#include "GridGenerator/io/STLReaderWriter/STLReader.h" -#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/TransientBCSetter/TransientBCSetter.h" + ////////////////////////////////////////////////////////////////////////// @@ -44,10 +77,12 @@ #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" #include "VirtualFluids_GPU/Parameter/Parameter.h" #include "VirtualFluids_GPU/Output/FileWriter.h" -#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" @@ -63,26 +98,10 @@ LbmOrGks lbmOrGks = LBM; -const real reference_diameter = 126.0; // diameter in m - -const real L_x = 10*reference_diameter; -const real L_y = 6*reference_diameter; -const real L_z = 6*reference_diameter; - -const real viscosity = 1.56e-5; - -const real velocity = 9.0; - -const real mach = 0.1; - -const uint nodes_per_diameter = 16; - std::string path("."); std::string simulationName("ActuatorLine"); -const float tOut = 100; -const float tEnd = 280; // total time of simulation in s //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -98,30 +117,59 @@ void multipleLevel(const std::string& configPath) vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); auto gridFactory = GridFactory::make(); - gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + vf::basics::ConfigurationFile config; + config.load(configPath); + + const real reference_diameter = config.getValue<real>("ReferenceDiameter"); + const uint nodes_per_diameter = config.getValue<uint>("NodesPerDiameter"); + const real velocity = config.getValue<real>("Velocity"); + + + const real L_x = 24*reference_diameter; + const real L_y = 6*reference_diameter; + const real L_z = 6*reference_diameter; + + const real viscosity = 1.56e-5; + + const real mach = 0.1; + + + const float tStartOut = config.getValue<real>("tStartOut"); + const float tOut = config.getValue<real>("tOut"); + const float tEnd = config.getValue<real>("tEnd"); // total time of simulation + + const float tStartAveraging = config.getValue<real>("tStartAveraging"); + const float tStartTmpAveraging = config.getValue<real>("tStartTmpAveraging"); + const float tAveraging = config.getValue<real>("tAveraging"); + const float tStartOutProbe = config.getValue<real>("tStartOutProbe"); + const float tOutProbe = config.getValue<real>("tOutProbe"); + + SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config); + BoundaryConditionFactory bcFactory = BoundaryConditionFactory(); + GridScalingFactory scalingFactory = GridScalingFactory(); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const real dx = reference_diameter/real(nodes_per_diameter); + real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter}; + gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, L_x, L_y, L_z, dx); + gridBuilder->setNumberOfLayers(4,0); + gridBuilder->addGrid( new Cuboid( turbPos[0]-1.5*reference_diameter, turbPos[1]-1.5*reference_diameter, turbPos[2]-1.5*reference_diameter, + turbPos[0]+10.0*reference_diameter, turbPos[1]+1.5*reference_diameter, turbPos[2]+1.5*reference_diameter) , 1 ); + para->setMaxLevel(2); + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); + gridBuilder->setPeriodicBoundaryCondition(false, false, false); gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!! - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - vf::basics::ConfigurationFile config; - config.load(configPath); - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^ - SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config); - BoundaryConditionFactory bcFactory = BoundaryConditionFactory(); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const real dt = dx * mach / (sqrt(3) * velocity); @@ -140,14 +188,11 @@ void multipleLevel(const std::string& configPath) para->setPrintFiles(true); - para->setMaxLevel(1); - - para->setVelocityLB(velocityLB); para->setViscosityLB(viscosityLB); para->setVelocityRatio( dx / dt ); para->setViscosityRatio( dx*dx/dt ); - para->setMainKernel("CumulantK17CompChim"); + para->setMainKernel("CumulantK17"); para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { rho = (real)0.0; @@ -156,13 +201,15 @@ void multipleLevel(const std::string& configPath) vz = (real)0.0; }); + para->setTimestepStartOut( uint(tStartOut/dt) ); para->setTimestepOut( uint(tOut/dt) ); para->setTimestepEnd( uint(tEnd/dt) ); para->setIsBodyForce( true ); - + para->setUseStreams( true ); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::MY, velocityLB, 0.0, 0.0); @@ -172,42 +219,52 @@ void multipleLevel(const std::string& configPath) gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible); - bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible); + bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective); + + SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para); + tmFactory->readConfigFile(config); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter}; - real epsilon = 5.f; // width of gaussian smearing - real density = 1.225f; - int level = 0; - uint nBlades = 3; - uint nBladeNodes = 32; + int level = 1; // grid level at which the turbine samples velocities and distributes forces + const real epsilon = dx*exp2(-level)*1.5; // width of gaussian smearing + const real density = 1.225f; + const uint nBlades = 3; + const uint nBladeNodes = 32; + const real tipspeed_ratio = 7.5f; // tipspeed ratio = angular vel * radius / inflow vel + const real omega = 2*tipspeed_ratio*velocity/reference_diameter; + + + SPtr<ActuatorFarm> actuator_farm = std::make_shared<ActuatorFarm>(nBlades, density, nBladeNodes, epsilon, level, dt, dx, true); + std::vector<real> bladeRadii; + real dr = reference_diameter/(nBladeNodes*2); + for(uint node=0; node<nBladeNodes; node++){ bladeRadii.emplace_back(dr*(node+1)); } + actuator_farm->addTurbine(turbPos[0], turbPos[1], turbPos[2], reference_diameter, omega, 0, 0, bladeRadii); + para->addActuator( actuator_farm ); - SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) ); - para->addActuator( actuator_line ); - SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", para->getOutputPath(), 100, 1, 500, 100) ); - std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter}; - std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; - std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; - pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ); - // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx); + // SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>("pointProbe", para->getOutputPath(), 100, 1, 500, 100); + // std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter}; + // std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; + // std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter}; + // pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ); + // // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx); - pointProbe->addStatistic(Statistic::Means); - pointProbe->addStatistic(Statistic::Variances); - para->addProbe( pointProbe ); + // pointProbe->addStatistic(Statistic::Means); + // pointProbe->addStatistic(Statistic::Variances); + // para->addProbe( pointProbe ); - SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", para->getOutputPath(), 100, 500, 100, 100) ); - planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z); - planeProbe->addStatistic(Statistic::Means); - para->addProbe( planeProbe ); + // SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), 100, 500, 100, 100); + // planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z); + // planeProbe->addStatistic(Statistic::Means); + // para->addProbe( planeProbe ); auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); - Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory); + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory, &scalingFactory); sim.run(); } diff --git a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt index 233994f0d32a48190d84f7044500e24b06b926a9..5799f24716777295b2f835ab00561ff767ba87b9 100644 --- a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt +++ b/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt @@ -6,3 +6,29 @@ Path = . #informations for reading ################################################## GridPath=. +################################################## +ReferenceDiameter=126 +NodesPerDiameter=32 +Velocity=9 +################################################## +tStartOut=100 +tOut=100 +tEnd=1000 +################################################## + +tStartTmpAveraging=100 +tStartAveraging=100 +tAveraging=100 +tTmpAveraging=100 +tStartOutProbe=100 +tOutProbe=100 + +################################################## +#TurbulenceModel = QR +#SGSconstant = 0.3333333 +# +#QuadricLimiterP = 100000.0 +#QuadricLimiterM = 100000.0 +#QuadricLimiterD = 100000.0 +################################################## + diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp index 991025b649d69305c030fe2f1dd1763a2137af9b..5fc31904433bfe2df0722ab1c63f574d3fcb9a35 100644 --- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp +++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp @@ -1,4 +1,35 @@ - +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file BoundaryLayer.cpp +//! \ingroup BoundaryLayer +//! \author Henry Korb, Henrik Asmuth +//======================================================================================= #define _USE_MATH_DEFINES #include <math.h> #include <string> @@ -8,6 +39,7 @@ #include <fstream> #include <exception> #include <memory> +#include <numeric> ////////////////////////////////////////////////////////////////////////// @@ -19,6 +51,7 @@ #include "Core/VectorTypes.h" #include <basics/config/ConfigurationFile.h> +#include "lbm/constants/NumericConstants.h" #include <logger/Logger.h> @@ -28,12 +61,16 @@ #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" #include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" + #include "GridGenerator/grid/GridFactory.h" +#include "geometries/Cuboid/Cuboid.h" +#include "geometries/TriangularMesh/TriangularMesh.h" + #include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" #include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" -#include "GridGenerator/io/STLReaderWriter/STLReader.h" -#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/TransientBCSetter/TransientBCSetter.h" ////////////////////////////////////////////////////////////////////////// @@ -44,24 +81,28 @@ #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" #include "VirtualFluids_GPU/Parameter/Parameter.h" #include "VirtualFluids_GPU/Output/FileWriter.h" -#include "VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h" #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h" +#include "VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" +#include "utilities/communication.h" + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// std::string path("."); -std::string simulationName("BoundayLayer"); +std::string simulationName("BoundaryLayer"); +using namespace vf::lbm::constant; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -87,8 +128,16 @@ void multipleLevel(const std::string& configPath) ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^ SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config); BoundaryConditionFactory bcFactory = BoundaryConditionFactory(); - + GridScalingFactory scalingFactory = GridScalingFactory(); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const int nProcs = communicator.getNummberOfProcess(); + const uint procID = vf::gpu::Communicator::getInstance().getPID(); + std::vector<uint> devices(10); + std::iota(devices.begin(), devices.end(), 0); + para->setDevices(devices); + para->setMaxDev(nProcs); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // @@ -100,23 +149,45 @@ void multipleLevel(const std::string& configPath) LbmOrGks lbmOrGks = LBM; - const real H = 1000.0; // boundary layer height in m + const real H = config.getValue("boundaryLayerHeight", 1000.0); // boundary layer height in m const real L_x = 6*H; const real L_y = 4*H; - const real L_z = 1*H; + const real L_z = H; + + const real z0 = config.getValue("z0", 0.1f); // roughness length in m + const real u_star = config.getValue("u_star", 0.4f); //friction velocity in m/s + const real kappa = config.getValue("vonKarmanConstant", 0.4f); // von Karman constant - const real z0 = 0.1; // roughness length in m - const real u_star = 0.4; //friction velocity in m/s - const real kappa = 0.4; // von Karman constant + const real viscosity = config.getValue("viscosity", 1.56e-5f); - const real viscosity = 1.56e-5; + const real velocity = 0.5f*u_star/kappa*log(H/z0+1.f); //0.5 times max mean velocity at the top in m/s - const real velocity = 0.5*u_star/kappa*log(L_z/z0); //0.5 times max mean velocity at the top in m/s + const real mach = config.getValue<real>("Ma", 0.1); - const real mach = config.contains("Ma")? config.getValue<real>("Ma"): 0.1; + const uint nodes_per_H = config.getValue<uint>("nz", 64); - const uint nodes_per_H = config.contains("nz")? config.getValue<uint>("nz"): 64; + const bool writePrecursor = config.getValue("writePrecursor", false); + bool useDistributions; + std::string precursorDirectory; + int nTWritePrecursor; real tStartPrecursor, posXPrecursor; + if(writePrecursor) + { + nTWritePrecursor = config.getValue<int>("nTimestepsWritePrecursor"); + tStartPrecursor = config.getValue<real>("tStartPrecursor"); + posXPrecursor = config.getValue<real>("posXPrecursor"); + useDistributions = config.getValue<bool>("useDistributions", false); + precursorDirectory = config.getValue<std::string>("precursorDirectory"); + } + + const bool readPrecursor = config.getValue("readPrecursor", false); + int timestepsBetweenReadsPrecursor; + if(readPrecursor) + { + timestepsBetweenReadsPrecursor = config.getValue<int>("nTimestepsReadPrecursor"); + precursorDirectory = config.getValue<std::string>("precursorDirectory"); + useDistributions = config.getValue<bool>("useDistributions", false); + } // all in s const float tStartOut = config.getValue<real>("tStartOut"); @@ -130,7 +201,7 @@ void multipleLevel(const std::string& configPath) const float tOutProbe = config.getValue<real>("tOutProbe"); - const real dx = L_z/real(nodes_per_H); + const real dx = H/real(nodes_per_H); const real dt = dx * mach / (sqrt(3) * velocity); @@ -155,15 +226,17 @@ void multipleLevel(const std::string& configPath) para->setPrintFiles(true); - para->setForcing(pressureGradientLB, 0, 0); + if(!readPrecursor) para->setForcing(pressureGradientLB, 0, 0); para->setVelocityLB(velocityLB); para->setViscosityLB(viscosityLB); para->setVelocityRatio( dx / dt ); para->setViscosityRatio( dx*dx/dt ); para->setDensityRatio( 1.0 ); - para->setMainKernel("TurbulentViscosityCumulantK17CompChim"); - + bool useStreams = (nProcs > 1 ? true: false); + // useStreams=false; + para->setUseStreams(useStreams); + para->setMainKernel("CumulantK17"); para->setIsBodyForce( config.getValue<bool>("bodyForce") ); para->setTimestepStartOut(uint(tStartOut/dt) ); @@ -172,64 +245,206 @@ void multipleLevel(const std::string& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - SPtr<TurbulenceModelFactory> tmFactory = SPtr<TurbulenceModelFactory>( new TurbulenceModelFactory(para) ); + SPtr<TurbulenceModelFactory> tmFactory = std::make_shared<TurbulenceModelFactory>(para); tmFactory->readConfigFile( config ); + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const real xSplit = L_x/nProcs; + const real overlap = 8.0*dx; + + real xMin = procID * xSplit; + real xMax = (procID+1) * xSplit; + real xGridMin = procID * xSplit; + real xGridMax = (procID+1) * xSplit; + + real yMin = 0.0; + real yMax = L_y; + real zMin = 0.0; + real zMax = L_z; + + bool isFirstSubDomain = (procID == 0 && nProcs > 1)? true: false; + bool isLastSubDomain = (procID == nProcs-1 && nProcs > 1)? true: false; + bool isMidSubDomain = (!isFirstSubDomain && !isLastSubDomain && nProcs > 1)? true: false; - // tmFactory->setTurbulenceModel(TurbulenceModel::AMD); - // tmFactory->setModelConstant(config.getValue<real>("SGSconstant")); + if(isFirstSubDomain) + { + xGridMax += overlap; + if(!readPrecursor) xGridMin -= overlap; + } + if(isLastSubDomain) + { + xGridMin -= overlap; + if(!readPrecursor) xGridMax += overlap; + } + if(isMidSubDomain) + { + xGridMax += overlap; + xGridMin -= overlap; + } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + gridBuilder->addCoarseGrid( xGridMin, 0.0, 0.0, + xGridMax, L_y, L_z, dx); + if(true)// Add refinement + { + gridBuilder->setNumberOfLayers(4,0); + real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax; //Stop refinement some distance before outlet if domain ist not periodic + gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xMaxRefinement, L_y, 0.5*L_z) , 1 ); + para->setMaxLevel(2); + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); + } + + if(nProcs > 1) + { + gridBuilder->setSubDomainBox( + std::make_shared<BoundingBox>(xMin, xMax, yMin, yMax, zMin, zMax)); + gridBuilder->setPeriodicBoundaryCondition(false, true, false); + } + else + { + gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false); + } - gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, - L_x, L_y, L_z, dx); - // gridBuilder->setNumberOfLayers(12, 8); + gridBuilder->buildGrids(lbmOrGks, true); // buildGrids() has to be called before setting the BCs!!!! - // gridBuilder->addGrid( new Cuboid( 0.0, 0.0, 0.0, L_x, L_y, 0.3*L_z) , 1 ); - // para->setMaxLevel(2); + std::cout << "nProcs: "<< nProcs << "Proc: " << procID << " isFirstSubDomain: " << isFirstSubDomain << " isLastSubDomain: " << isLastSubDomain << " isMidSubDomain: " << isMidSubDomain << std::endl; + + if(nProcs > 1){ + if (isFirstSubDomain || isMidSubDomain) { + gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks); + gridBuilder->setCommunicationProcess(CommunicationDirections::PX, procID+1); + } - gridBuilder->setPeriodicBoundaryCondition(true, true, false); + if (isLastSubDomain || isMidSubDomain) { + gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks); + gridBuilder->setCommunicationProcess(CommunicationDirections::MX, procID-1); + } - gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!! + if (isFirstSubDomain && !readPrecursor) { + gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks); + gridBuilder->setCommunicationProcess(CommunicationDirections::MX, nProcs-1); + } + if (isLastSubDomain && !readPrecursor) { + gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks); + gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 0); + } + } uint samplingOffset = 2; - // gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); + + std::cout << " precursorDirectory " << precursorDirectory << std::endl; + + if(readPrecursor) + { + if(isFirstSubDomain || nProcs == 1) + { + auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK); + gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, timestepsBetweenReadsPrecursor); + // gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0); + } + + if(isLastSubDomain || nProcs == 1) + { + gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.f); + } + } + gridBuilder->setStressBoundaryCondition(SideType::MZ, 0.0, 0.0, 1.0, // wall normals - samplingOffset, z0/dx); // wall model settinng - para->setHasWallModelMonitor(true); - bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack); + samplingOffset, z0, dx); // wall model settinng + para->setHasWallModelMonitor(true); + gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0f, 0.0f, -1.0f); - gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0); + bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible); + bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack); bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); - + bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective); + bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor); + para->setOutflowPressureCorrectionFactor(0.0); - real cPi = 3.1415926535897932384626433832795; - para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { + if(readPrecursor) + { + para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { rho = (real)0.0; - vx = (u_star/0.4 * log(coordZ/z0) + 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt / dx; - vy = 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt / dx; - vz = 8.0*u_star/0.4*(sin(cPi*8.0*coordY/H)*sin(cPi*8.0*coordZ/H)+sin(cPi*8.0*coordX/L_x))/(pow(L_z/2.0-coordZ, c2o1)+c1o1) * dt / dx; - }); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + vx = rho = c0o1; + vx = u_star/c4o10*(u_star/c4o10 * log(coordZ/z0+c1o1)) * dt/dx; + vy = c0o1; + vz = c0o1; + }); + } + else + { + para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { + rho = (real)0.0; + vx = rho = c0o1; + vx = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt/dx; + vy = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt/dx; + vz = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) * dt/dx; + }); + } + + - SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') ); - planarAverageProbe->addAllAvailableStatistics(); - planarAverageProbe->setFileNameToNOut(); - para->addProbe( planarAverageProbe ); + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + if(!readPrecursor && (isFirstSubDomain || nProcs == 1)) + { + SPtr<PlanarAverageProbe> planarAverageProbe = SPtr<PlanarAverageProbe>( new PlanarAverageProbe("planeProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt , tStartOutProbe/dt, tOutProbe/dt, 'z') ); + planarAverageProbe->addAllAvailableStatistics(); + planarAverageProbe->setFileNameToNOut(); + para->addProbe( planarAverageProbe ); + + para->setHasWallModelMonitor(true); + SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) ); + wallModelProbe->addAllAvailableStatistics(); + wallModelProbe->setFileNameToNOut(); + wallModelProbe->setForceOutputToStress(true); + if(para->getIsBodyForce()) + wallModelProbe->setEvaluatePressureGradient(true); + para->addProbe( wallModelProbe ); + } + + SPtr<PlaneProbe> planeProbe1 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_1", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe1->setProbePlane(100.0, 0.0, 0, dx, L_y, L_z); + planeProbe1->addAllAvailableStatistics(); + para->addProbe( planeProbe1 ); + + if(readPrecursor) + { + SPtr<PlaneProbe> planeProbe2 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_2", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe2->setProbePlane(1000.0, 0.0, 0, dx, L_y, L_z); + planeProbe2->addAllAvailableStatistics(); + para->addProbe( planeProbe2 ); + + SPtr<PlaneProbe> planeProbe3 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_3", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe3->setProbePlane(1500.0, 0.0, 0, dx, L_y, L_z); + planeProbe3->addAllAvailableStatistics(); + para->addProbe( planeProbe3 ); + + SPtr<PlaneProbe> planeProbe4 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_4", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe4->setProbePlane(2000.0, 0.0, 0, dx, L_y, L_z); + planeProbe4->addAllAvailableStatistics(); + para->addProbe( planeProbe4 ); + + SPtr<PlaneProbe> planeProbe5 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_5", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe5->setProbePlane(2500.0, 0.0, 0, dx, L_y, L_z); + planeProbe5->addAllAvailableStatistics(); + para->addProbe( planeProbe5 ); + + SPtr<PlaneProbe> planeProbe6 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_6", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) ); + planeProbe6->setProbePlane(0.0, L_y/2.0, 0, L_x, dx, L_z); + planeProbe6->addAllAvailableStatistics(); + para->addProbe( planeProbe6 ); + } - para->setHasWallModelMonitor(true); - SPtr<WallModelProbe> wallModelProbe = SPtr<WallModelProbe>( new WallModelProbe("wallModelProbe", para->getOutputPath(), tStartAveraging/dt, tStartTmpAveraging/dt, tAveraging/dt/4.0 , tStartOutProbe/dt, tOutProbe/dt) ); - wallModelProbe->addAllAvailableStatistics(); - wallModelProbe->setFileNameToNOut(); - wallModelProbe->setForceOutputToStress(true); - if(para->getIsBodyForce()) - wallModelProbe->setEvaluatePressureGradient(true); - para->addProbe( wallModelProbe ); + if(writePrecursor) + { + SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities, 1000); + para->addProbe(precursorWriter); + } auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); - Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory); + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory, &scalingFactory); sim.run(); } diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt index a489f0ab89738a193b16fee41c212a5943f6525d..83e7861a5fb85ea800d187699f1c6c1409422f0a 100644 --- a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt +++ b/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt @@ -7,7 +7,7 @@ Path = . ################################################## GridPath = . ################################################## -Devices = 1 +Devices = 0 ################################################## tStartOut = 0 tOut = 100000 @@ -28,3 +28,15 @@ SGSconstant = 0.2 QuadricLimiterP = 100000.0 QuadricLimiterM = 100000.0 QuadricLimiterD = 100000.0 + +################################################## +readPrecursor = false +nTimestepsReadPrecursor = 10 +precursorFile = precursor/Precursor + +################################################## +writePrecursor = false +nTimestepsWritePrecursor = 10 + +tStartPrecursor = 100 +posXPrecursor = 3000 \ No newline at end of file diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp index 69ecb3d8cbd45a8a7419437e934a57bd20b0bc9f..5e1cab7f48f7fb672c85f0decee4bcc2d4ac158f 100644 --- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp +++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp @@ -85,7 +85,7 @@ int main() const real L = 1.0; const real Re = 1000.0; const real velocity = 1.0; - const real dt = (real)0.5e-3; + const real velocityLB = 0.05; // LB units const uint nx = 64; const uint timeStepOut = 1000; @@ -109,10 +109,20 @@ int main() auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); ////////////////////////////////////////////////////////////////////////// - // create grid + // compute parameters in lattice units ////////////////////////////////////////////////////////////////////////// - real dx = L / real(nx); + const real dx = L / real(nx); + const real dt = velocityLB / velocity * dx; + + const real vxLB = velocityLB / sqrt(2.0); // LB units + const real vyLB = velocityLB / sqrt(2.0); // LB units + + const real viscosityLB = nx * velocityLB / Re; // LB units + + ////////////////////////////////////////////////////////////////////////// + // create grid + ////////////////////////////////////////////////////////////////////////// gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx); @@ -124,17 +134,6 @@ int main() gridBuilder->buildGrids(LbmOrGks::LBM, false); - ////////////////////////////////////////////////////////////////////////// - // compute parameters in lattice units - ////////////////////////////////////////////////////////////////////////// - - const real velocityLB = velocity * dt / dx; // LB units - - const real vxLB = velocityLB / sqrt(2.0); // LB units - const real vyLB = velocityLB / sqrt(2.0); // LB units - - const real viscosityLB = nx * velocityLB / Re; // LB units - ////////////////////////////////////////////////////////////////////////// // set parameters ////////////////////////////////////////////////////////////////////////// @@ -154,7 +153,7 @@ int main() para->setTimestepOut(timeStepOut); para->setTimestepEnd(timeStepEnd); - para->setMainKernel("CumulantK17CompChimRedesigned"); + para->setMainKernel("CumulantK17"); ////////////////////////////////////////////////////////////////////////// // set boundary conditions @@ -164,8 +163,8 @@ int main() gridBuilder->setNoSlipBoundaryCondition(SideType::MX); gridBuilder->setNoSlipBoundaryCondition(SideType::PY); gridBuilder->setNoSlipBoundaryCondition(SideType::MY); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); gridBuilder->setNoSlipBoundaryCondition(SideType::MZ); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); BoundaryConditionFactory bcFactory; diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp index 8ca6939924fcfba22c8b96f000b9d8d05a3f7f43..ed6b4da7a3218e4d89ac90b053d9c054e4dd8205 100644 --- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp +++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp @@ -50,6 +50,7 @@ #include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h" #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" @@ -57,19 +58,6 @@ #include "utilities/communication.h" -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// U s e r s e t t i n g s -// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -const std::string outPath("output/DrivenCavity_Results/"); -const std::string gridPath = "output/DrivenCavity_Results/grid/"; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -92,15 +80,12 @@ void multipleLevel(std::filesystem::path& configPath) config.load(configPath.string()); SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config); BoundaryConditionFactory bcFactory = BoundaryConditionFactory(); - + GridScalingFactory scalingFactory = GridScalingFactory(); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool useGridGenerator = true; bool useLevels = true; - // para->setUseStreams(useStreams); // set in config - // para->useReducedCommunicationAfterFtoC = true; // set in config - para->setCalcTurbulenceIntensity(false); if (para->getNumprocs() == 1) { para->useReducedCommunicationAfterFtoC = false; @@ -108,47 +93,40 @@ void multipleLevel(std::filesystem::path& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - const real L = 1.0; - const real Re = 1000.0; // 1000 + const std::string outPath("output/"); + const std::string gridPath = "output/"; + std::string simulationName("DrivenCavityMultiGPU"); + + const real L = 1.0; + const real Re = 1000.0; const real velocity = 1.0; - const real dt = (real)1.0e-3; // 0.5e-3; - const uint nx = 64; - std::string simulationName("DrivenCavityChimMultiGPU"); + const real velocityLB = 0.05; // LB units + const uint nx = 64; // para->setTimestepOut(10000); // set in config // para->setTimestepEnd(10000); // set in config const real dxGrid = L / real(nx); - const real velocityLB = velocity * dt / dxGrid; // LB units + const real dt = velocityLB / velocity * dxGrid; const real vxLB = velocityLB / (real)sqrt(2.0); // LB units const real vyLB = velocityLB / (real)sqrt(2.0); // LB units const real viscosityLB = nx * velocityLB / Re; // LB units - para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) { - rho = (real)1.0; - vx = (real)(coordX * velocityLB); - vy = (real)(coordY * velocityLB); - vz = (real)(coordZ * velocityLB); - }); - para->setVelocityLB(velocityLB); para->setViscosityLB(viscosityLB); para->setVelocityRatio(velocity / velocityLB); - para->setDensityRatio((real)1.0); // correct value? + para->setDensityRatio((real)1.0); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - para->setCalcDragLift(false); - para->setUseWale(false); - if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);} para->setOutputPrefix(simulationName); para->setPrintFiles(true); std::cout << "Write result files to " << para->getFName() << std::endl; - // para->setMainKernel("CumulantK17CompChim"); - para->setMainKernel("CumulantK17CompChimStream"); + para->setMainKernel("CumulantK17"); + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -157,7 +135,7 @@ void multipleLevel(std::filesystem::path& configPath) VF_LOG_INFO("velocity LB [dx/dt] = {}", vxLB); VF_LOG_INFO("viscosity LB [dx/dt] = {}", viscosityLB); VF_LOG_INFO("dxGrid [-] = {}\n", dxGrid); - + VF_LOG_INFO("dt [s] = {}", dt); VF_LOG_INFO("simulation parameters:"); VF_LOG_INFO("mainKernel = {}\n", para->getMainKernel()); @@ -226,7 +204,7 @@ void multipleLevel(std::filesystem::path& configPath) if (generatePart == 0) gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); if (generatePart == 1) - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0); @@ -303,13 +281,13 @@ void multipleLevel(std::filesystem::path& configPath) } if (generatePart == 2) { gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0); if (generatePart == 3) { gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } if (generatePart == 1) { gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); @@ -472,22 +450,22 @@ void multipleLevel(std::filesystem::path& configPath) if (generatePart == 4) { gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } if (generatePart == 5) { gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } if (generatePart == 6) { gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } if (generatePart == 7) { gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); } ////////////////////////////////////////////////////////////////////////// } @@ -513,7 +491,7 @@ void multipleLevel(std::filesystem::path& configPath) gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0); gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0); - gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); ////////////////////////////////////////////////////////////////////////// gridBuilder->writeGridsToVtk(outPath + "/grid/"); @@ -534,7 +512,7 @@ void multipleLevel(std::filesystem::path& configPath) gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager); } - Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory); + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory); sim.run(); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt index c710922b9fc82ac7680f5f7daade4faa235bc957..c5789cdf96049b7c0a31ce693c29cd2db4952a58 100644 --- a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt +++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt @@ -4,35 +4,11 @@ Devices="0 1 2 3" NumberOfDevices=4 -################################################## -#informations for Writing -################################################## -Path=/work/y0078217/Results/DrivenCavityMultiGPUResults/4GPU/ -#Prefix="DrivenCavityMultiGPU" -#WriteGrid=true -################################################## -#informations for reading -################################################## -GridPath=/work/y0078217/Grids/GridDrivenCavityMultiGPU/4GPU/ -#GridPath="C:" - -################################################## -#number of grid levels -################################################## -#NOGL=1 - -################################################## -#LBM Version -################################################## -#D3Qxx=27 -#MainKernelName=CumulantK17CompChim - ################################################## #simulation parameter ################################################## -TimeEnd=1 -TimeOut=1 -#TimeStartOut=0 +TimeEnd=10000 +TimeOut=10000 ################################################## # CUDA Streams and optimized communication (only used for multiple GPUs) diff --git a/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..40b4f08d7500c56efae7378df6398d065e4ecbfb --- /dev/null +++ b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt @@ -0,0 +1,10 @@ +PROJECT(DrivenCavityUniform LANGUAGES CUDA CXX) + +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" ) + +vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp) + +set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA) + +set_target_properties(DrivenCavityUniform PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + diff --git a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp new file mode 100644 index 0000000000000000000000000000000000000000..958ef4714118aac34b8cfb0bec3aab97b108b01d --- /dev/null +++ b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp @@ -0,0 +1,231 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file LidDrivenCavity.cpp +//! \ingroup Applications +//! \author Martin Schoenherr, Stephan Lenz +//======================================================================================= +#define _USE_MATH_DEFINES +#include <exception> +#include <fstream> +#include <iostream> +#include <memory> +#include <sstream> +#include <stdexcept> +#include <string> + +////////////////////////////////////////////////////////////////////////// + +#include "Core/DataTypes.h" +#include "Core/LbmOrGks.h" +#include "Core/Logger/Logger.h" +#include "Core/VectorTypes.h" +#include "PointerDefinitions.h" + +#include <logger/Logger.h> + +////////////////////////////////////////////////////////////////////////// + +#include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/GridFactory.h" +#include "GridGenerator/geometries/Cuboid/Cuboid.h" + +////////////////////////////////////////////////////////////////////////// + +#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" +#include "VirtualFluids_GPU/Communication/Communicator.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h" +#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" +#include "VirtualFluids_GPU/LBM/Simulation.h" +#include "VirtualFluids_GPU/Output/FileWriter.h" +#include "VirtualFluids_GPU/Parameter/Parameter.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" + +////////////////////////////////////////////////////////////////////////// + +int main() +{ + try { + vf::logging::Logger::initalizeLogger(); + ////////////////////////////////////////////////////////////////////////// + // Simulation parameters + ////////////////////////////////////////////////////////////////////////// + std::string path("./output/DrivenCavity_uniform"); + std::string simulationName("LidDrivenCavity"); + + const real L = 1.0; + const real Re = 1000.0; + const real velocity = 1.0; + const real dt = (real)0.5e-3; + const uint nx = 64; + + const uint timeStepOut = 1000; + const uint timeStepEnd = 10000; + + ////////////////////////////////////////////////////////////////////////// + // setup logger + ////////////////////////////////////////////////////////////////////////// + + logging::Logger::addStream(&std::cout); + logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW); + logging::Logger::timeStamp(logging::Logger::ENABLE); + logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE); + + ////////////////////////////////////////////////////////////////////////// + // setup gridGenerator + ////////////////////////////////////////////////////////////////////////// + + auto gridFactory = GridFactory::make(); + gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); + auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); + + ////////////////////////////////////////////////////////////////////////// + // create grid + ////////////////////////////////////////////////////////////////////////// + + real dx = L / real(nx); + + gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx); + + // gridBuilder->addGrid(new Cuboid(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid + GridScalingFactory scalingFactory = GridScalingFactory(); + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); + + gridBuilder->setPeriodicBoundaryCondition(false, false, false); + + gridBuilder->buildGrids(LbmOrGks::LBM, false); + + ////////////////////////////////////////////////////////////////////////// + // compute parameters in lattice units + ////////////////////////////////////////////////////////////////////////// + + const real velocityLB = velocity * dt / dx; // LB units + + const real vxLB = velocityLB / sqrt(2.0); // LB units + const real vyLB = velocityLB / sqrt(2.0); // LB units + + const real viscosityLB = nx * velocityLB / Re; // LB units + + ////////////////////////////////////////////////////////////////////////// + // set parameters + ////////////////////////////////////////////////////////////////////////// + SPtr<Parameter> para = std::make_shared<Parameter>(); + + para->setOutputPath(path); + para->setOutputPrefix(simulationName); + + para->setPrintFiles(true); + + para->setVelocityLB(velocityLB); + para->setViscosityLB(viscosityLB); + + para->setVelocityRatio(velocity / velocityLB); + para->setDensityRatio(1.0); + + para->setTimestepOut(timeStepOut); + para->setTimestepEnd(timeStepEnd); + + para->setMainKernel("CumulantK17"); + + ////////////////////////////////////////////////////////////////////////// + // set boundary conditions + ////////////////////////////////////////////////////////////////////////// + + gridBuilder->setNoSlipBoundaryCondition(SideType::PX); + gridBuilder->setNoSlipBoundaryCondition(SideType::MX); + gridBuilder->setNoSlipBoundaryCondition(SideType::PY); + gridBuilder->setNoSlipBoundaryCondition(SideType::MY); + gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0); + gridBuilder->setNoSlipBoundaryCondition(SideType::MZ); + + BoundaryConditionFactory bcFactory; + + bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack); + bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible); + + ////////////////////////////////////////////////////////////////////////// + // set copy mesh to simulation + ////////////////////////////////////////////////////////////////////////// + + vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance(); + + auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); + SPtr<GridProvider> gridGenerator = + GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); + + + ////////////////////////////////////////////////////////////////////////// + // run simulation + ////////////////////////////////////////////////////////////////////////// + + VF_LOG_INFO("Start Running DrivenCavity Showcase..."); + printf("\n"); + VF_LOG_INFO("world parameter:"); + VF_LOG_INFO("--------------"); + VF_LOG_INFO("dt [s] = {}", dt); + VF_LOG_INFO("world_length [m] = {}", L); + VF_LOG_INFO("world_velocity [m/s] = {}", velocity); + VF_LOG_INFO("dx [m] = {}", dx); + printf("\n"); + VF_LOG_INFO("LB parameter:"); + VF_LOG_INFO("--------------"); + VF_LOG_INFO("Re = {}", Re); + VF_LOG_INFO("lb_velocity [dx/dt] = {}", velocityLB); + VF_LOG_INFO("lb_viscosity [dx^2/dt] = {}", viscosityLB); + VF_LOG_INFO("lb_vx [dx/dt] (lb_velocity/sqrt(2)) = {}", vxLB); + VF_LOG_INFO("lb_vy [dx/dt] (lb_velocity/sqrt(2)) = {}", vyLB); + printf("\n"); + VF_LOG_INFO("simulation parameter:"); + VF_LOG_INFO("--------------"); + VF_LOG_INFO("nx = {}", nx); + VF_LOG_INFO("ny = {}", nx); + VF_LOG_INFO("nz = {}", nx); + VF_LOG_INFO("number of nodes = {}", nx * nx * nx); + VF_LOG_INFO("n timesteps = {}", timeStepOut); + VF_LOG_INFO("write_nth_timestep = {}", timeStepEnd); + VF_LOG_INFO("output_path = {}", path); + + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory); + sim.run(); + + } catch (const spdlog::spdlog_ex &ex) { + std::cout << "Log initialization failed: " << ex.what() << std::endl; + } catch (const std::bad_alloc &e) { + VF_LOG_CRITICAL("Bad Alloc: {}", e.what()); + } catch (const std::exception &e) { + VF_LOG_CRITICAL("exception: {}", e.what()); + } catch (...) { + VF_LOG_CRITICAL("Unknown exception!"); + } + + return 0; +} diff --git a/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt new file mode 100644 index 0000000000000000000000000000000000000000..458346a67c7f001580494af1dc9262034613be68 --- /dev/null +++ b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt @@ -0,0 +1,34 @@ +################################################## +#GPU Mapping +################################################## +#Devices="0 1 2 3" +#NumberOfDevices=4 + +################################################## +#informations for Writing +################################################## +#Path = "output/" +#Prefix="DrivenCavity" +#WriteGrid=true +################################################## +#informations for reading +################################################## +#GridPath="grid/" + +################################################## +#number of grid levels +################################################## +#NOGL=1 + +################################################## +#LBM Version +################################################## +#D3Qxx=27 +#MainKernelName=CumulantAA2016CompSP27 + +################################################## +#simulation parameter +################################################## +#TimeEnd=100000 +#TimeOut=1000 +#TimeStartOut=0 \ No newline at end of file diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp index efac863fc9efd446e5f266648ad4fa74c954634f..dc5eaf58aff9b4a1b87d70c187b81461330ee3da 100644 --- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp +++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp @@ -40,7 +40,6 @@ ////////////////////////////////////////////////////////////////////////// -#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h" #include "VirtualFluids_GPU/Communication/Communicator.h" #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" @@ -49,7 +48,7 @@ #include "VirtualFluids_GPU/LBM/Simulation.h" #include "VirtualFluids_GPU/Output/FileWriter.h" #include "VirtualFluids_GPU/Parameter/Parameter.h" -#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" ////////////////////////////////////////////////////////////////////////// diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt index 4e2b0c91482b6a650ff28a210673cac097cb8c2d..2bf6955062da5c98f6a7b931c19821c52eaf15ea 100644 --- a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt +++ b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt @@ -7,14 +7,14 @@ NumberOfDevices=4 ################################################## #informations for Writing ################################################## -Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/ +#Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/ #Path="F:/Work/Computations/out/MusselOyster/" #Prefix="MusselOyster" #WriteGrid=true ################################################## #informations for reading ################################################## -GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/ +#GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/ #GridPath="C:" ################################################## @@ -31,8 +31,8 @@ GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/ ################################################## #simulation parameter ################################################## -TimeEnd=400000 # 800000 -TimeOut=100000 # 400000 +TimeEnd=100000 # 800000 +TimeOut=10000 # 400000 #TimeStartOut=0 ################################################## diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp index d8642c7b267bcad6c58ab2a9c178c2d9394ecf2a..7514c2b273bf60d6e2523f132911dde8839d296a 100644 --- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp +++ b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp @@ -1,63 +1,95 @@ -//#define MPI_LOGGING +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TGV_3D.cpp +//! \ingroup Applications +//! \author Martin Schoenherr +//======================================================================================= +#define _USE_MATH_DEFINES +#include <exception> +#include <filesystem> +#include <fstream> +#include <iostream> +#include <math.h> +#include <memory> +#include <sstream> +#include <stdexcept> +#include <string> -//Martin Branch +#include "mpi.h" -#include <mpi.h> -#if defined( MPI_LOGGING ) - #include <mpe.h> -#endif +////////////////////////////////////////////////////////////////////////// -#include <string> -#include <sstream> -#include <iostream> -#include <stdexcept> -#include <fstream> -#define _USE_MATH_DEFINES -#include <math.h> +#include "Core/DataTypes.h" +#include "Core/LbmOrGks.h" +#include "Core/Logger/Logger.h" +#include "Core/VectorTypes.h" +#include "PointerDefinitions.h" -//#include "metis.h" +////////////////////////////////////////////////////////////////////////// -#include "basics/Core/LbmOrGks.h" -#include "basics/Core/StringUtilities/StringUtil.h" -#include <basics/config/ConfigurationFile.h> +#include "GridGenerator/geometries/Conglomerate/Conglomerate.h" +#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" +#include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/GridFactory.h" + +#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" +#include "GridGenerator/io/STLReaderWriter/STLReader.h" +#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" + +////////////////////////////////////////////////////////////////////////// -#include "VirtualFluids_GPU/LBM/Simulation.h" #include "VirtualFluids_GPU/Communication/Communicator.h" -#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h" #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" -#include "VirtualFluids_GPU/Parameter/Parameter.h" -#include "VirtualFluids_GPU/Output/FileWriter.h" - -#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h" -#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h" #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" - #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" +#include "VirtualFluids_GPU/LBM/Simulation.h" +#include "VirtualFluids_GPU/Output/FileWriter.h" +#include "VirtualFluids_GPU/Parameter/Parameter.h" -#include "global.h" - -#include "geometries/Sphere/Sphere.h" -#include "geometries/VerticalCylinder/VerticalCylinder.h" -#include "geometries/Cuboid/Cuboid.h" -#include "geometries/TriangularMesh/TriangularMesh.h" -#include "geometries/Conglomerate/Conglomerate.h" -#include "geometries/TriangularMesh/TriangularMeshStrategy.h" - -#include "grid/GridBuilder/LevelGridBuilder.h" -#include "grid/GridBuilder/MultipleGridBuilder.h" -#include "grid/BoundaryConditions/Side.h" -#include "grid/BoundaryConditions/BoundaryCondition.h" -#include "grid/GridFactory.h" +#include <logger/Logger.h> -#include "io/SimulationFileWriter/SimulationFileWriter.h" -#include "io/GridVTKWriter/GridVTKWriter.h" -#include "io/STLReaderWriter/STLReader.h" -#include "io/STLReaderWriter/STLWriter.h" +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// U s e r s e t t i n g s +// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#include "utilities/math/Math.h" -#include "utilities/communication.h" -#include "utilities/transformator/TransformatorImp.h" //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c @@ -94,8 +126,8 @@ bool useWale = false; std::string kernel( "CumulantK17Comp" ); -std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS -//std::string path("E:/DrivenCavity/results/"); //TESLA03 +//std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS +std::string path("D:/out/TGV_3D/"); //TESLA03 std::string simulationName("TGV_3D"); ////////////////////////////////////////////////////////////////////////// diff --git a/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..efb4310669f9c0de7aa5cf3f1e4dffa00bd66cbf --- /dev/null +++ b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt @@ -0,0 +1,7 @@ +PROJECT(TGV_3D_GridRef LANGUAGES CUDA CXX) + +vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES TGV_3D_GridRef.cpp) + +set_source_files_properties(TGV_3D_GridRef.cpp PROPERTIES LANGUAGE CUDA) + +set_target_properties(TGV_3D_GridRef PROPERTIES CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file diff --git a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a88fee2e583a7cb227702ff19ada7daced1b1708 --- /dev/null +++ b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp @@ -0,0 +1,399 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TGV_3D.cpp +//! \ingroup Applications +//! \author Martin Schoenherr +//======================================================================================= +#define _USE_MATH_DEFINES +#include <exception> +#include <filesystem> +#include <fstream> +#include <iostream> +#include <math.h> +#include <memory> +#include <sstream> +#include <stdexcept> +#include <string> + +#include "mpi.h" + +////////////////////////////////////////////////////////////////////////// + +#include "Core/DataTypes.h" +#include "Core/LbmOrGks.h" +#include "Core/Logger/Logger.h" +#include "Core/VectorTypes.h" +#include "PointerDefinitions.h" + +////////////////////////////////////////////////////////////////////////// + +#include "GridGenerator/geometries/Conglomerate/Conglomerate.h" +#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" +#include "GridGenerator/grid/BoundaryConditions/Side.h" +#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" +#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h" +#include "GridGenerator/grid/GridFactory.h" + +#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h" +#include "GridGenerator/io/STLReaderWriter/STLReader.h" +#include "GridGenerator/io/STLReaderWriter/STLWriter.h" +#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" + +////////////////////////////////////////////////////////////////////////// + +#include "VirtualFluids_GPU/Communication/Communicator.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h" +#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h" +#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "VirtualFluids_GPU/Factories/GridScalingFactory.h" +#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" +#include "VirtualFluids_GPU/LBM/Simulation.h" +#include "VirtualFluids_GPU/Output/FileWriter.h" +#include "VirtualFluids_GPU/Parameter/Parameter.h" + +#include <logger/Logger.h> + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// U s e r s e t t i n g s +// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +char* getCmdOption(char ** begin, char ** end, const std::string & option) +{ + char ** itr = std::find(begin, end, option); + if (itr != end && ++itr != end) + { + return *itr; + } + return 0; +} + +bool cmdOptionExists(char** begin, char** end, const std::string& option) +{ + return std::find(begin, end, option) != end; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// +real Re = 1600.0; + +uint dtPerL = 500; + +uint nx = 64; +uint gpuIndex = 0; + +bool useLimiter = false; +bool useWale = false; + +std::string kernel( "CumulantK17CompChimRedesigned" ); + +std::string path("D:/out/TGV_3D/"); //MOLLOK + +std::string simulationName("TGV_3D_Gridref_noSqPress"); +////////////////////////////////////////////////////////////////////////// + +void multipleLevel(const std::string& configPath) +{ + logging::Logger::addStream(&std::cout); + logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW); + logging::Logger::timeStamp(logging::Logger::ENABLE); + logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE); + + vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); + + auto gridFactory = GridFactory::make(); + //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING); + gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT); + //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE); + + auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory); + + vf::basics::ConfigurationFile config; + config.load(configPath); + SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config); + BoundaryConditionFactory bcFactory = BoundaryConditionFactory(); + GridScalingFactory scalingFactory = GridScalingFactory(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const real PI = 3.141592653589793238462643383279; + + real L = nx / ( 2.0 * PI ); + + const real velocity = 64.0 / ( dtPerL * 2.0 * PI ); + + const real viscosity = nx / ( 2.0 * PI ) * velocity / Re; + + *logging::out << logging::Logger::INFO_HIGH << "velocity = " << velocity << " s\n"; + + *logging::out << logging::Logger::INFO_HIGH << "viscosity = " << viscosity << "\n"; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + real dx = 2.0 * PI / real(nx); + + gridBuilder->addCoarseGrid(-PI, -PI, -PI, + PI, PI, PI, dx); + + gridBuilder->setNumberOfLayers(0, 0); + + auto fineGrid = new Cuboid(-PI * 0.5, -PI * 0.5, -PI * 0.5, + 0.0, PI * 0.5, 0.0); + + gridBuilder->addGrid(fineGrid, 1); + + gridBuilder->setPeriodicBoundaryCondition(true, true, true); + + gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!! + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible); + + //std::stringstream _path; + // std::stringstream _prefix; + + // //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_Re_1.6e4"; + // //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_neqInit"; + // _path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/Re_1600/AA2016/" << nx << "_FD_O8"; + + // //_path << "./results/AA2016/" << nx; + // //_path << "./results/CumOne/" << nx; + // //_path << "./results/F3_2018/" << nx; + + // _prefix << "TGV_3D_" << nx << "_" ; + + // para->setOutputPath(_path.str()); + // para->setOutputPrefix(_prefix.str()); + // para->setPathAndFilename(_path.str() + "/" + _prefix.str()); + + ////////////////////////////////////////////////////////////////////////// + + { + std::stringstream _path; + + _path << path; + _path << kernel; + _path << "SingleGPU"; + + if (useLimiter) _path << "_Limiter"; + + path = _path.str(); + } + + ////////////////////////////////////////////////////////////////////////// + + { + std::stringstream _simulationName; + + _simulationName << simulationName; + _simulationName << "_nx_" << nx; + _simulationName << "_dtPerL_" << dtPerL << "_"; + + simulationName = _simulationName.str(); + } + + ////////////////////////////////////////////////////////////////////////// + + para->setDevices(std::vector<uint>{gpuIndex}); + + ////////////////////////////////////////////////////////////////////////// + + para->setOutputPath( path ); + para->setOutputPrefix( simulationName ); + + para->setPrintFiles(true); + + para->setTimestepEnd(40 * lround(L / velocity)); + para->setTimestepOut(5 * lround(L / velocity)); + //para->setTimestepOut(lround(L / velocity)); + // para->setTimestepEnd(2048); + //para->setTimestepOut(512); + // para->setTimestepStartOut(500); + + para->setVelocityLB( velocity ); + + para->setViscosityLB( viscosity ); + + para->setVelocityRatio( 1.0 / velocity ); + + para->setDensityRatio(1.0); + + para->setInitialCondition( [&]( real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz){ + + real a = 1.0; + real b = 1.0; + real c = 1.0; + + rho = 3.0 * ((velocity * velocity) / 16.0 * ( cos( 2.0 * a * coordX ) + cos( 2.0 * b * coordY ) ) * ( cos( 2.0 * c * coordZ ) + 2.0 ) ); + vx = velocity * sin( a * coordX ) * cos( b * coordY ) * cos( c * coordZ ); + vy = -velocity * cos( a * coordX ) * sin( b * coordY ) * cos( c * coordZ ); + vz = 0.0; + + } ); + + para->setMainKernel( kernel ); + + if( !useLimiter ) + para->setQuadricLimiters( 1000000.0, 1000000.0, 1000000.0 ); + + if( useWale ) + para->setUseWale( true ); + + para->setUseInitNeq( true ); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para); + SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator); + //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager); + + SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter()); + Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory); + sim.run(); + + //sim.addKineticEnergyAnalyzer( 10 ); + //sim.addEnstrophyAnalyzer( 10 ); + + //sim.run(); +} + + +int main( int argc, char* argv[]) +{ + MPI_Init(&argc, &argv); + std::string str, str2; + if ( argv != NULL ) + { + //str = static_cast<std::string>(argv[0]); + + try + { + ////////////////////////////////////////////////////////////////////////// + std::string targetPath( __FILE__ ); + +#ifdef _WIN32 + targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1); +#else + targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1); +#endif + + ////////////////////////////////////////////////////////////////////////// + + if( cmdOptionExists( argv, argv+argc, "--Re" ) ) + Re = atof( getCmdOption( argv, argv+argc, "--Re" ) ); + + if( cmdOptionExists( argv, argv+argc, "--nx" ) ) + nx = atoi( getCmdOption( argv, argv+argc, "--nx" ) ); + + if( cmdOptionExists( argv, argv+argc, "--dtPerL" ) ) + dtPerL = atoi( getCmdOption( argv, argv+argc, "--dtPerL" ) ); + + if( cmdOptionExists( argv, argv+argc, "--kernel" ) ) + kernel = getCmdOption( argv, argv+argc, "--kernel" ); + + if( cmdOptionExists( argv, argv+argc, "--gpu" ) ) + gpuIndex = atoi( getCmdOption( argv, argv+argc, "--gpu" ) ); + + if( cmdOptionExists( argv, argv+argc, "--useLimiter" ) ) + useLimiter = true; + + if( cmdOptionExists( argv, argv+argc, "--useWale" ) ) + useWale = true; + + multipleLevel(targetPath + "config.txt"); + + ////////////////////////////////////////////////////////////////////////// + } + catch (const std::bad_alloc& e) + { + + *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n"; + //std::cout << e.what() << std::flush; + //MPI_Abort(MPI_COMM_WORLD, -1); + } + catch (const std::exception& e) + { + + *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n"; + //std::cout << e.what() << std::flush; + //MPI_Abort(MPI_COMM_WORLD, -1); + } + catch (...) + { + *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n"; + //std::cout << "unknown exeption" << std::endl; + } + + //std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush; + //MPI_Abort(MPI_COMM_WORLD, -1); + } + + + /* + MPE_Init_log() & MPE_Finish_log() are NOT needed when + liblmpe.a is linked with this program. In that case, + MPI_Init() would have called MPE_Init_log() already. + */ +#if defined( MPI_LOGGING ) + MPE_Init_log(); +#endif + +#if defined( MPI_LOGGING ) + if ( argv != NULL ) + MPE_Finish_log( argv[0] ); + if ( str != "" ) + MPE_Finish_log( str.c_str() ); + else + MPE_Finish_log( "TestLog" ); +#endif + + MPI_Finalize(); + return 0; +} diff --git a/apps/gpu/LBM/TGV_3D_GridRef/config.txt b/apps/gpu/LBM/TGV_3D_GridRef/config.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae6d3e9bc4be5403d151f3d59ffb13af7164abf0 --- /dev/null +++ b/apps/gpu/LBM/TGV_3D_GridRef/config.txt @@ -0,0 +1,36 @@ +################################################## +#GPU Mapping +################################################## +#Devices="0 1 2 3" +#NumberOfDevices=4 + +################################################## +#informations for Writing +################################################## +#Path="E:/DrivenCavity/results" +#Path="F:/Work/Computations/out/DrivenCavity/" +#Prefix="DrivenCavity" +#WriteGrid=true +################################################## +#informations for reading +################################################## +#GridPath="E:/DrivenCavity/dummy" +GridPath="F:/Work/Computations/out/TaylorGreen3DNew/grid" + +################################################## +#number of grid levels +################################################## +NOGL=2 + +################################################## +#LBM Version +################################################## +#D3Qxx=27 +#MainKernelName=CumulantAA2016CompSP27 + +################################################## +#simulation parameter +################################################## +#TimeEnd=100000 +#TimeOut=1000 +#TimeStartOut=0 \ No newline at end of file diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp index 8c303dc07c911c363e892ce53f7bfe7f48e284d6..045c208274bc6bc216d25e8c2fa905916a52f87b 100644 --- a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp +++ b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp @@ -1,7 +1,38 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TGV_3d_MuitiGPU.cpp +//! \ingroup TGV_3D_MultiGPU +//! \author Martin Schoenherr +//======================================================================================= //#define MPI_LOGGING //Martin Branch - #include <mpi.h> #if defined( MPI_LOGGING ) #include <mpe.h> @@ -97,7 +128,7 @@ bool useWale = false; int mpirank; int mpiWorldSize; -std::string kernel( "CumulantK20Comp" ); +std::string kernel( "CumulantK17CompChim" ); //std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS //std::string path("results/"); //PHOENIX diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp index 06b3678d7c8ddd236c26a69686356fbe87c31db2..3e083afd690632dbaabdde5d00f2ab454d86032b 100644 --- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp +++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WTG_RUB.cpp +//! \ingroup Applications +//! \author Martin Schoenherr +//======================================================================================= #define _USE_MATH_DEFINES #include <math.h> #include <string> @@ -15,15 +47,10 @@ #include "Core/DataTypes.h" #include "PointerDefinitions.h" - #include "Core/LbmOrGks.h" -#include "Core/StringUtilities/StringUtil.h" - #include "Core/VectorTypes.h" #include "Core/Logger/Logger.h" -#include <basics/config/ConfigurationFile.h> - ////////////////////////////////////////////////////////////////////////// #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h" diff --git a/metadata.xml b/metadata.xml deleted file mode 100644 index 7cbae3ae7e1d5d7d48af2f0e5577253a89f953f5..0000000000000000000000000000000000000000 --- a/metadata.xml +++ /dev/null @@ -1,204 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd"> - <identifier identifierType="DOI">PLACEHOLDER</identifier> - <titles> - <title xml:lang="en">VirtualFluids</title> - </titles> - <language>en</language> - <creators> - <creator> - <creatorName nameType="Personal">Krafczyk, Manfred</creatorName> - <givenName>Manfred</givenName> - <familyName>Krafczyk</familyName> - <nameIdentifier nameIdentifierScheme="ORCID">0000-0002-8509-0871</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </creator> - <creator> - <creatorName nameType="Organizational">Institut für rechnergestützte Modellierung im Bauingenieurwesen</creatorName> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - </creator> - </creators> - <publisher xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</publisher> - <publicationYear>2021</publicationYear> - <resourceType resourceTypeGeneral="Software">Computational Fluid Dynamics Solver</resourceType> - <subjects> - <subject subjectScheme="DDC" schemeURI="https://www.oclc.org/en/dewey.html">532 Fluid Mechanics, liquid mechanics</subject> - </subjects> - <contributors> - <contributor contributorType="Researcher"> - <contributorName>Ahrenholz, Benjamin</contributorName> - <givenName>Benjamin</givenName> - <familyName>Ahrenholz</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Alihussein, Hussein</contributorName> - <givenName>Hussein</givenName> - <familyName>Alihussein</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3656-7028</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Bindick, Sebastian</contributorName> - <givenName>Sebastian</givenName> - <familyName>Bindick</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Brendel, Aileen</contributorName> - <givenName>Aileen</givenName> - <familyName>Brendel</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Geier, Martin</contributorName> - <givenName>Martin</givenName> - <familyName>Geier</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-8367-9412</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Geller, Sebastian</contributorName> - <givenName>Sebastian</givenName> - <familyName>Geller</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Goraki Fard, Ehsan</contributorName> - <givenName>Ehsan</givenName> - <familyName>Goraki Fard</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Hegewald, Jan</contributorName> - <givenName>Jan</givenName> - <familyName>Hegewald</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Janßen, Christian</contributorName> - <givenName>Christian</givenName> - <familyName>Janßen</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Kutscher, Konstantin</contributorName> - <givenName>Konstantin</givenName> - <familyName>Kutscher</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-1099-1608</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Lenz, Stephan</contributorName> - <givenName>Stephan</givenName> - <familyName>Lenz</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Linxweiler, Jan</contributorName> - <givenName>Jan</givenName> - <familyName>Linxweiler</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-2755-5087</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Lux, Lennard</contributorName> - <givenName>Lennard</givenName> - <familyName>Lux</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Marcus, Sven</contributorName> - <givenName>Sven</givenName> - <familyName>Marcus</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3689-2162</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Universitätsbibliothek Braunschweig</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Peters, Sören</contributorName> - <givenName>Sören</givenName> - <familyName>Peters</familyName> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Safari, Hesameddin</contributorName> - <givenName>Hesameddin</givenName> - <familyName>Safari</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Schönherr, Martin</contributorName> - <givenName>Martin</givenName> - <familyName>Schönherr</familyName> - <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-4774-1776</nameIdentifier> - <affiliation xml:lang="de">TU Braunschweig</affiliation> - <affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Stiebler, Maik</contributorName> - <givenName>Maik</givenName> - <familyName>Stiebler</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Textor, Sören</contributorName> - <givenName>Sören</givenName> - <familyName>Textor</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Tölke, Jonas</contributorName> - <givenName>Jonas</givenName> - <familyName>Tölke</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Uphoff, Sonja</contributorName> - <givenName>Sonja</givenName> - <familyName>Uphoff</familyName> - </contributor> - - <contributor contributorType="Researcher"> - <contributorName>Wellmann, Anna</contributorName> - <givenName>Anna</givenName> - <familyName>Wellmann</familyName> - </contributor> - </contributors> - <dates> - <date dateType="Created">2000</date> - </dates> - <formats> - <format>text/x-c</format> - <format>text/x-h</format> - <format>text/x-script.python</format> - </formats> - <relatedIdentifiers> - <relatedIdentifier relatedIdentifierType="URL" relationType="Requires" resourceTypeGeneral="Software">https://www.open-mpi.org/software/ompi/v4.1/</relatedIdentifier> - <relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://cmake.org</relatedIdentifier> - <relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://gcc.gnu.org</relatedIdentifier> - <relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://clang.llvm.org</relatedIdentifier> - <relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://visualstudio.microsoft.com/vs/features/cplusplus/</relatedIdentifier> - </relatedIdentifiers> - <rightsList> - <rights xml:lang="en" schemeURI="https://spdx.org/licenses/" rightsIdentifierScheme="SPDX" rightsIdentifier="GPL-3.0-only" rightsURI="https://www.gnu.org/licenses/gpl-3.0-standalone.html">GNU General Public License Version 3</rights> - </rightsList> - <descriptions> - <description descriptionType="Abstract"> - VirtualFluids (VF) is a research code developed at the Institute for Computational Modeling in Civil Engineering (iRMB). The code is a Computational Fluid Dynamics (CFD) solver based on the Lattice Boltzmann Method (LBM) for turbulent, thermal, multiphase and multicomponent flow problems as well as for multi-field problems such as Fluid-Structure-interaction including distributed pre- and postprocessing capabilities for simulations with more than 100 billion degrees of freedom. - </description> - </descriptions> -</resource> diff --git a/pyproject.toml b/pyproject.toml index 8fcb7926102d188b44d8c74084235b6f175edf80..257da6fd95d683081dbff865c864079eae9c675d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,9 @@ [build-system] -requires = ["setuptools", "wheel", "scikit-build"] \ No newline at end of file +requires = [ + "setuptools>=42", + "scikit-build", + "cmake", + "ninja; platform_system!='Windows'" +] +build-backend = "setup_builder" +backend-path = ["utilities"] \ No newline at end of file diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt index 5a84adef027fdfa2953e016693bb64570e48c1ef..815a4b59cf6c3e4e5ac4a7a72a5bd4e374d64c96 100644 --- a/pythonbindings/CMakeLists.txt +++ b/pythonbindings/CMakeLists.txt @@ -1,24 +1,45 @@ -project(VirtualFluidsPython LANGUAGES CUDA CXX) +set(PYFLUIDS_LANGUAGES CXX) + +if(BUILD_VF_GPU) + set(PYFLUIDS_LANGUAGES CUDA CXX) +endif() + +project(VirtualFluidsPython LANGUAGES ${PYFLUIDS_LANGUAGES}) + +pybind11_add_module(python_bindings MODULE src/VirtualFluids.cpp) + +set_target_properties( python_bindings PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pyfluids + OUTPUT_NAME "bindings") + +target_link_libraries(python_bindings PRIVATE basics logger mpi) + IF(BUILD_VF_GPU) - pybind11_add_module(pyfluids src/VirtualFluidsModulesGPU.cpp) - set_source_files_properties(src/VirtualFluidsModulesGPU.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(src/VirtualFluids.cpp PROPERTIES LANGUAGE CUDA) - target_link_libraries(pyfluids PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger) - target_include_directories(pyfluids PRIVATE ${VF_THIRD_DIR}/cuda_samples/) + target_include_directories(python_bindings PRIVATE ${VF_THIRD_DIR}/cuda_samples/) + target_compile_definitions(python_bindings PRIVATE VF_GPU_PYTHONBINDINGS) + target_link_libraries(python_bindings PRIVATE GridGenerator VirtualFluids_GPU lbm) ENDIF() + IF(BUILD_VF_CPU) - pybind11_add_module(pyfluids src/VirtualFluidsModulesCPU.cpp) - pybind11_add_module(pymuparser src/muParser.cpp) + target_compile_definitions(python_bindings PRIVATE VF_METIS VF_MPI VF_CPU_PYTHONBINDINGS) + target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser lbm) + + # include bindings for muparsers + pybind11_add_module(pymuparser MODULE src/muParser.cpp) # TODO: Move this to MuParser CMakeLists.txt set_target_properties(muparser PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(pyfluids PRIVATE VF_METIS VF_MPI) + set_target_properties( pymuparser PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pymuparser + OUTPUT_NAME "bindings") target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI) - - target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics) target_link_libraries(pymuparser PRIVATE muparser) ENDIF() -target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/) -target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR}) \ No newline at end of file + + +target_include_directories(python_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/) +target_include_directories(python_bindings PRIVATE ${CMAKE_BINARY_DIR}) \ No newline at end of file diff --git a/Python/boundary_layer/__init__.py b/pythonbindings/pyfluids-stubs/__init__.pyi similarity index 100% rename from Python/boundary_layer/__init__.py rename to pythonbindings/pyfluids-stubs/__init__.pyi diff --git a/pythonbindings/pyfluids-stubs/bindings/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..4e7f353eab97cc536f8f18e72319af1cd7a1916a --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/__init__.pyi @@ -0,0 +1,38 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file __init__.pyi +! \ingroup bindings +! \author Henry Korb +======================================================================================= +""" +class ostream_redirect: + def __init__(self, stdout: bool = ..., stderr: bool = ...) -> None: ... + def __enter__(self) -> None: ... + def __exit__(self, *args) -> None: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..a41b7934ca706dc0db5bd6188fee3150456e0cd9 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi @@ -0,0 +1,82 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file __init__.py +! \ingroup basics +! \author Henry Korb +======================================================================================= +""" +from typing import ClassVar + +from typing import overload + +class ConfigurationFile: + def __init__(self) -> None: ... + def contains(self, key: str) -> bool: ... + @overload + def get_bool_value(self, key: str) -> bool: ... + @overload + def get_bool_value(self, key: str, default_value: bool) -> bool: ... + @overload + def get_double_value(self, key: str) -> float: ... + @overload + def get_double_value(self, key: str, default_value: float) -> float: ... + @overload + def get_float_value(self, key: str) -> float: ... + @overload + def get_float_value(self, key: str, default_value: float) -> float: ... + @overload + def get_int_value(self, key: str) -> int: ... + @overload + def get_int_value(self, key: str, default_value: int) -> int: ... + @overload + def get_string_value(self, key: str) -> str: ... + @overload + def get_string_value(self, key: str, default_value: str) -> str: ... + @overload + def get_uint_value(self, key: str) -> int: ... + @overload + def get_uint_value(self, key: str, default_value: int) -> int: ... + def load(self, file: str) -> bool: ... + +class LbmOrGks: + __members__: ClassVar[dict] = ... # read-only + GKS: ClassVar[LbmOrGks] = ... + LBM: ClassVar[LbmOrGks] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi new file mode 100644 index 0000000000000000000000000000000000000000..43938ff7646efd3c596ae29971cce39fed865fa6 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi @@ -0,0 +1,83 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file logger.pyi +! \ingroup basics +! \author Henry Korb +======================================================================================= +""" +from typing import Any, ClassVar + +log: None + +class Level: + __members__: ClassVar[dict] = ... # read-only + INFO_HIGH: ClassVar[Level] = ... + INFO_INTERMEDIATE: ClassVar[Level] = ... + INFO_LOW: ClassVar[Level] = ... + LOGGER_ERROR: ClassVar[Level] = ... + WARNING: ClassVar[Level] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class Logger: + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def add_stdout() -> None: ... + @staticmethod + def enable_printed_rank_numbers(print: bool) -> None: ... + @staticmethod + def set_debug_level(level: int) -> None: ... + @staticmethod + def time_stamp(time_stemp: TimeStamp) -> None: ... + +class TimeStamp: + __members__: ClassVar[dict] = ... # read-only + DISABLE: ClassVar[TimeStamp] = ... + ENABLE: ClassVar[TimeStamp] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..36c2fea76713e980bb95eb6726d778de8c9a6583 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/gpu/__init__.pyi @@ -0,0 +1,436 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file __init__.pyi +! \ingroup gpu +! \author Henry Korb +======================================================================================= +""" +from typing import Any, Callable, ClassVar, List, Optional + +from typing import overload +import numpy +import pyfluids.bindings.basics +import pyfluids.bindings.gpu.grid_generator as grid_generator + +class ActuatorFarm(PreCollisionInteractor): + def __init__(self, number_of_blades_per_turbine: int, density: float, number_of_nodes_per_blade: int, epsilon: float, level: int, delta_t: float, delta_x: float, use_host_arrays: bool) -> None: ... + def add_turbine(self, posX: float, posY: float, posZ: float, diameter: float, omega: float, azimuth: float, yaw: float, bladeRadii: List[float]) -> None: ... + def calc_blade_forces(self) -> None: ... + def get_all_azimuths(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_coords_x(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_coords_x_device(self) -> int: ... + def get_all_blade_coords_y(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_coords_y_device(self) -> int: ... + def get_all_blade_coords_z(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_coords_z_device(self) -> int: ... + def get_all_blade_forces_x(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_forces_x_device(self) -> int: ... + def get_all_blade_forces_y(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_forces_y_device(self) -> int: ... + def get_all_blade_forces_z(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_forces_z_device(self) -> int: ... + def get_all_blade_radii(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_radii_device(self) -> int: ... + def get_all_blade_velocities_x(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_velocities_x_device(self) -> int: ... + def get_all_blade_velocities_y(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_velocities_y_device(self) -> int: ... + def get_all_blade_velocities_z(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_blade_velocities_z_device(self) -> int: ... + def get_all_omegas(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_turbine_pos_x(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_turbine_pos_y(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_turbine_pos_z(self) -> numpy.ndarray[numpy.float32]: ... + def get_all_yaws(self) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_azimuth(self, turbine: int) -> float: ... + def get_turbine_blade_coords_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_coords_x_device(self, turbine: int) -> int: ... + def get_turbine_blade_coords_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_coords_y_device(self, turbine: int) -> int: ... + def get_turbine_blade_coords_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_coords_z_device(self, turbine: int) -> int: ... + def get_turbine_blade_forces_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_forces_x_device(self, turbine: int) -> int: ... + def get_turbine_blade_forces_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_forces_y_device(self, turbine: int) -> int: ... + def get_turbine_blade_forces_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_forces_z_device(self, turbine: int) -> int: ... + def get_turbine_blade_radii(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_radii_device(self, turbine: int) -> int: ... + def get_turbine_blade_velocities_x(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_velocities_x_device(self, turbine: int) -> int: ... + def get_turbine_blade_velocities_y(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_velocities_y_device(self, turbine: int) -> int: ... + def get_turbine_blade_velocities_z(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_blade_velocities_z_device(self, turbine: int) -> int: ... + def get_turbine_omega(self, turbine: int) -> float: ... + def get_turbine_pos(self, turbine: int) -> numpy.ndarray[numpy.float32]: ... + def get_turbine_yaw(self, turbine: int) -> float: ... + def set_all_azimuths(self, azimuths: numpy.ndarray[numpy.float32]) -> None: ... + def set_all_blade_coords(self, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_all_blade_forces(self, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_all_blade_velocities(self, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_all_omegas(self, omegas: numpy.ndarray[numpy.float32]) -> None: ... + def set_all_yaws(self, yaws: numpy.ndarray[numpy.float32]) -> None: ... + def set_turbine_azimuth(self, turbine: int, azimuth: float) -> None: ... + def set_turbine_blade_coords(self, turbine: int, blade_coords_x: numpy.ndarray[numpy.float32], blade_coords_y: numpy.ndarray[numpy.float32], blade_coords_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_turbine_blade_forces(self, turbine: int, blade_forces_x: numpy.ndarray[numpy.float32], blade_forces_y: numpy.ndarray[numpy.float32], blade_forces_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_turbine_blade_velocities(self, turbine: int, blade_velocities_x: numpy.ndarray[numpy.float32], blade_velocities_y: numpy.ndarray[numpy.float32], blade_velocities_z: numpy.ndarray[numpy.float32]) -> None: ... + def set_turbine_omega(self, turbine: int, omega: float) -> None: ... + def set_turbine_yaw(self, turbine: int, yaw: float) -> None: ... + @property + def delta_t(self) -> float: ... + @property + def delta_x(self) -> float: ... + @property + def density(self) -> float: ... + @property + def number_of_blades_per_turbine(self) -> int: ... + @property + def number_of_indices(self) -> int: ... + @property + def number_of_nodes(self) -> int: ... + @property + def number_of_nodes_per_blade(self) -> int: ... + @property + def number_of_turbines(self) -> int: ... + +class BoundaryConditionFactory: + def __init__(self) -> None: ... + def set_geometry_boundary_condition(self, boundary_condition_type) -> None: ... + def set_no_slip_boundary_condition(self, boundary_condition_type) -> None: ... + def set_precursor_boundary_condition(self, boundary_condition_type) -> None: ... + def set_pressure_boundary_condition(self, boundary_condition_type) -> None: ... + def set_slip_boundary_condition(self, boundary_condition_type) -> None: ... + def set_stress_boundary_condition(self, boundary_condition_type) -> None: ... + def set_velocity_boundary_condition(self, boundary_condition_type) -> None: ... + +class Communicator: + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def get_instance() -> Communicator: ... + def get_number_of_process(self) -> int: ... + def get_pid(self) -> int: ... + +class CudaMemoryManager: + def __init__(self, parameter: Parameter) -> None: ... + +class FileType: + __members__: ClassVar[dict] = ... # read-only + VTK: ClassVar[FileType] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class GridProvider: + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def make_grid_generator(builder: grid_generator.GridBuilder, para: Parameter, cuda_memory_manager: CudaMemoryManager, communicator: Communicator) -> GridProvider: ... + +class GridScaling: + __members__: ClassVar[dict] = ... # read-only + NotSpecified: ClassVar[GridScaling] = ... + ScaleCompressible: ClassVar[GridScaling] = ... + ScaleRhoSq: ClassVar[GridScaling] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class GridScalingFactory: + def __init__(self) -> None: ... + def set_scaling_factory(self, scaling_type) -> None: ... + +class NoSlipBC: + __members__: ClassVar[dict] = ... # read-only + NoSlip3rdMomentsCompressible: ClassVar[NoSlipBC] = ... + NoSlipBounceBack: ClassVar[NoSlipBC] = ... + NoSlipCompressible: ClassVar[NoSlipBC] = ... + NoSlipImplicitBounceBack: ClassVar[NoSlipBC] = ... + NoSlipIncompressible: ClassVar[NoSlipBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class OutputVariable: + __members__: ClassVar[dict] = ... # read-only + Distributions: ClassVar[OutputVariable] = ... + Velocities: ClassVar[OutputVariable] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class Parameter: + @overload + def __init__(self, number_of_processes: int, my_ID: int, config_data: Optional[pyfluids.bindings.basics.ConfigurationFile]) -> None: ... + @overload + def __init__(self, number_of_processes: int, my_ID: int) -> None: ... + @overload + def __init__(self, config_data: pyfluids.bindings.basics.ConfigurationFile) -> None: ... + def add_actuator(self, actuator: PreCollisionInteractor) -> None: ... + def add_probe(self, probe: PreCollisionInteractor) -> None: ... + def get_SGS_constant(self) -> float: ... + def get_density_ratio(self) -> float: ... + def get_force_ratio(self) -> float: ... + def get_is_body_force(self) -> bool: ... + def get_output_path(self) -> str: ... + def get_output_prefix(self) -> str: ... + def get_velocity(self) -> float: ... + def get_velocity_ratio(self) -> float: ... + def get_viscosity(self) -> float: ... + def get_viscosity_ratio(self) -> float: ... + def set_AD_kernel(self, ad_kernel: str) -> None: ... + def set_calc_turbulence_intensity(self, calc_velocity_and_fluctuations: bool) -> None: ... + def set_comp_on(self, is_comp: bool) -> None: ... + def set_density_ratio(self, density_ratio: float) -> None: ... + def set_devices(self, devices: List[int]) -> None: ... + def set_diff_on(self, is_diff: bool) -> None: ... + def set_forcing(self, forcing_x: float, forcing_y: float, forcing_z: float) -> None: ... + def set_has_wall_model_monitor(self, has_wall_monitor: bool) -> None: ... + def set_initial_condition(self, init_func: Callable[[float,float,float],List[float]]) -> None: ... + def set_initial_condition_log_law(self, u_star: float, z0: float, velocity_ratio: float) -> None: ... + def set_initial_condition_perturbed_log_law(self, u_star: float, z0: float, length_x: float, length_z: float, height: float, velocity_ratio: float) -> None: ... + def set_initial_condition_uniform(self, velocity_x: float, velocity_y: float, velocity_z: float) -> None: ... + def set_is_body_force(self, is_body_force: bool) -> None: ... + def set_main_kernel(self, kernel: str) -> None: ... + def set_max_dev(self, max_dev: int) -> None: ... + def set_max_level(self, number_of_levels: int) -> None: ... + def set_outflow_pressure_correction_factor(self, correction_factor: float) -> None: ... + def set_output_path(self, o_path: str) -> None: ... + def set_output_prefix(self, o_prefix: str) -> None: ... + def set_print_files(self, print_files: bool) -> None: ... + def set_quadric_limiters(self, quadric_limiter_p: float, quadric_limiter_m: float, quadric_limiter_d: float) -> None: ... + def set_temperature_BC(self, temp_bc: float) -> None: ... + def set_temperature_init(self, temp: float) -> None: ... + def set_timestep_end(self, tend: int) -> None: ... + def set_timestep_of_coarse_level(self, timestep: int) -> None: ... + def set_timestep_out(self, tout: int) -> None: ... + def set_timestep_start_out(self, t_start_out: int) -> None: ... + def set_use_streams(self, use_streams: bool) -> None: ... + def set_velocity_LB(self, velocity: float) -> None: ... + def set_velocity_ratio(self, velocity_ratio: float) -> None: ... + def set_viscosity_LB(self, viscosity: float) -> None: ... + def set_viscosity_ratio(self, viscosity_ratio: float) -> None: ... + +class PreCollisionInteractor: + def __init__(self, *args, **kwargs) -> None: ... + +class PrecursorBC: + __members__: ClassVar[dict] = ... # read-only + DistributionsPrecursor: ClassVar[PrecursorBC] = ... + NotSpecified: ClassVar[PrecursorBC] = ... + VelocityPrecursor: ClassVar[PrecursorBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class PrecursorWriter(PreCollisionInteractor): + def __init__(self, filename: str, output_path: str, x_pos: float, y_min: float, y_max: float, z_min: float, z_max: float, t_start_out: int, t_save: int, output_variable: OutputVariable, max_timesteps_per_file: int) -> None: ... + +class PressureBC: + __members__: ClassVar[dict] = ... # read-only + NotSpecified: ClassVar[PressureBC] = ... + OutflowNonReflective: ClassVar[PressureBC] = ... + OutflowNonReflectivePressureCorrection: ClassVar[PressureBC] = ... + PressureEquilibrium: ClassVar[PressureBC] = ... + PressureEquilibrium2: ClassVar[PressureBC] = ... + PressureNonEquilibriumCompressible: ClassVar[PressureBC] = ... + PressureNonEquilibriumIncompressible: ClassVar[PressureBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class SideType: + __members__: ClassVar[dict] = ... # read-only + GEOMETRY: ClassVar[SideType] = ... + MX: ClassVar[SideType] = ... + MY: ClassVar[SideType] = ... + MZ: ClassVar[SideType] = ... + PX: ClassVar[SideType] = ... + PY: ClassVar[SideType] = ... + PZ: ClassVar[SideType] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class Simulation: + @overload + def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory, gridScalingFactory: GridScalingFactory) -> None: ... + @overload + def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory) -> None: ... + @overload + def __init__(self, parameter: Parameter, memoryManager: CudaMemoryManager, communicator, gridProvider: GridProvider, bcFactory: BoundaryConditionFactory, tmFactory: TurbulenceModelFactory, gridScalingFactory: GridScalingFactory) -> None: ... + def addEnstrophyAnalyzer(self, t_analyse: int) -> None: ... + def addKineticEnergyAnalyzer(self, t_analyse: int) -> None: ... + def run(self) -> None: ... + +class SlipBC: + __members__: ClassVar[dict] = ... # read-only + NotSpecified: ClassVar[SlipBC] = ... + SlipBounceBack: ClassVar[SlipBC] = ... + SlipCompressible: ClassVar[SlipBC] = ... + SlipCompressibleTurbulentViscosity: ClassVar[SlipBC] = ... + SlipIncompressible: ClassVar[SlipBC] = ... + SlipPressureCompressibleTurbulentViscosity: ClassVar[SlipBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class StressBC: + __members__: ClassVar[dict] = ... # read-only + NotSpecified: ClassVar[StressBC] = ... + StressBounceBack: ClassVar[StressBC] = ... + StressCompressible: ClassVar[StressBC] = ... + StressPressureBounceBack: ClassVar[StressBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class TurbulenceModel: + __members__: ClassVar[dict] = ... # read-only + AMD: ClassVar[TurbulenceModel] = ... + NONE: ClassVar[TurbulenceModel] = ... + QR: ClassVar[TurbulenceModel] = ... + Smagorinsky: ClassVar[TurbulenceModel] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class TurbulenceModelFactory: + def __init__(self, para: Parameter) -> None: ... + def read_config_file(self, config_data: pyfluids.bindings.basics.ConfigurationFile) -> None: ... + def set_model_constant(self, model_constant: float) -> None: ... + def set_turbulence_model(self, turbulence_model: TurbulenceModel) -> None: ... + +class VTKFileCollection(FileCollection): + def __init__(self, prefix: str) -> None: ... + +class VelocityBC: + __members__: ClassVar[dict] = ... # read-only + NotSpecified: ClassVar[VelocityBC] = ... + VelocityAndPressureCompressible: ClassVar[VelocityBC] = ... + VelocityCompressible: ClassVar[VelocityBC] = ... + VelocityIncompressible: ClassVar[VelocityBC] = ... + VelocitySimpleBounceBackCompressible: ClassVar[VelocityBC] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class FileCollection: + def __init__(self, *args, **kwargs) -> None: ... + +def create_file_collection(prefix: str, type: FileType) -> FileCollection: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi new file mode 100644 index 0000000000000000000000000000000000000000..8d715e4b4cd49e6dbf92da3aedddbc4b869067c4 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi @@ -0,0 +1,100 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file grid_generator.pyi +! \ingroup gpu +! \author Henry Korb +======================================================================================= +""" +from typing import Any, List + +from typing import overload +import pyfluids.bindings.basics +import pyfluids.bindings.gpu + +class BoundingBox: + def __init__(self, min_x: float, max_x: float, min_y: float, max_y: float, min_z: float, max_z: float) -> None: ... + +class Conglomerate(Object): + def __init__(self, *args, **kwargs) -> None: ... + def add(self, object: Object) -> None: ... + @staticmethod + def make_shared() -> Conglomerate: ... + def subtract(self, object: Object) -> None: ... + +class Cuboid(Object): + def __init__(self, min_x1: float, min_x2: float, min_x3: float, max_x1: float, max_x2: float, max_x3: float) -> None: ... + +class GridBuilder: + def __init__(self, *args, **kwargs) -> None: ... + def get_number_of_grid_levels(self) -> int: ... + +class GridFactory: + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def make() -> GridFactory: ... + +class LevelGridBuilder(GridBuilder): + def __init__(self, *args, **kwargs) -> None: ... + def set_no_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType) -> None: ... + def set_periodic_boundary_condition(self, periodic_x: bool, periodic_y: bool, periodic_z: bool) -> None: ... + def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ... + def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ... + def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ... + def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float) -> None: ... + def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ... + +class MultipleGridBuilder(LevelGridBuilder): + def __init__(self, *args, **kwargs) -> None: ... + def add_coarse_grid(self, start_x: float, start_y: float, start_z: float, end_x: float, end_y: float, end_z: float, delta: float) -> None: ... + @overload + def add_geometry(self, solid_object: Object) -> None: ... + @overload + def add_geometry(self, solid_object: Object, level: int) -> None: ... + @overload + def add_grid(self, grid_shape: Object) -> None: ... + @overload + def add_grid(self, grid_shape: Object, level_fine: int) -> None: ... + def build_grids(self, lbm_or_gks: pyfluids.bindings.basics.LbmOrGks, enable_thin_walls: bool) -> None: ... + def get_number_of_levels(self) -> int: ... + @staticmethod + def make_shared(grid_factory: GridFactory) -> MultipleGridBuilder: ... + +class Object: + def __init__(self, *args, **kwargs) -> None: ... + +class Sphere(Object): + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def make_shared() -> Sphere: ... + +class TriangularMesh(Object): + def __init__(self, *args, **kwargs) -> None: ... + @staticmethod + def make() -> TriangularMesh: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi new file mode 100644 index 0000000000000000000000000000000000000000..af9c40078e6009efebda4450b5c5e23586aa1e83 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/gpu/probes.pyi @@ -0,0 +1,85 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file probes.pyi +! \ingroup gpu +! \author Henry Korb +======================================================================================= +""" +from typing import ClassVar, List + +import pyfluids.bindings.gpu + +class PlanarAverageProbe(Probe): + def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int, plane_normal: str) -> None: ... + +class PlaneProbe(Probe): + def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ... + def set_probe_plane(self, pos_x: float, pos_y: float, pos_z: float, delta_x: float, delta_y: float, delta_z: float) -> None: ... + +class PointProbe(Probe): + def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_avg: int, t_start_out: int, t_out: int, output_timeseries: bool) -> None: ... + def add_probe_points_from_list(self, point_coords_x: List[float], point_coords_y: List[float], point_coords_z: List[float]) -> None: ... + def add_probe_points_from_x_normal_plane(self, pos_x: float, pos0_y: float, pos0_z: float, pos1_y: float, pos1_z: float, n_y: int, n_z: int) -> None: ... + +class Probe(pyfluids.bindings.gpu.PreCollisionInteractor): + def __init__(self, *args, **kwargs) -> None: ... + def add_all_available_statistics(self) -> None: ... + def add_statistic(self, variable: Statistic) -> None: ... + def set_file_name_to_n_out(self) -> None: ... + +class Statistic: + __members__: ClassVar[dict] = ... # read-only + Instantaneous: ClassVar[Statistic] = ... + Means: ClassVar[Statistic] = ... + SpatialCovariances: ClassVar[Statistic] = ... + SpatialFlatness: ClassVar[Statistic] = ... + SpatialMeans: ClassVar[Statistic] = ... + SpatialSkewness: ClassVar[Statistic] = ... + SpatioTemporalCovariances: ClassVar[Statistic] = ... + SpatioTemporalFlatness: ClassVar[Statistic] = ... + SpatioTemporalMeans: ClassVar[Statistic] = ... + SpatioTemporalSkewness: ClassVar[Statistic] = ... + Variances: ClassVar[Statistic] = ... + __entries: ClassVar[dict] = ... + def __init__(self, arg0: int) -> None: ... + def __eq__(self, arg0: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, arg0: object) -> bool: ... + def __setstate__(self, arg0: int) -> None: ... + @property + def name(self) -> str: ... + +class WallModelProbe(Probe): + def __init__(self, probe_name: str, output_path: str, t_start_avg: int, t_start_tmp_avg: int, t_avg: int, t_start_out: int, t_out: int) -> None: ... + def set_evaluate_pressure_gradient(self, eval_press_grad: bool) -> None: ... + def set_force_output_to_stress(self, output_stress: bool) -> None: ... diff --git a/pythonbindings/pyfluids-stubs/bindings/lbm.pyi b/pythonbindings/pyfluids-stubs/bindings/lbm.pyi new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonbindings/pyfluids-stubs/bindings/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/logger.pyi new file mode 100644 index 0000000000000000000000000000000000000000..fe84eeb18f3245ef72ed023b2de9db7b9131d144 --- /dev/null +++ b/pythonbindings/pyfluids-stubs/bindings/logger.pyi @@ -0,0 +1,45 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file logger.pyi +! \ingroup bindings +! \author Henry Korb +======================================================================================= +""" +class Logger: + @staticmethod + def change_log_path(path: str) -> None: ... + @staticmethod + def initialize_logger() -> None: ... + +def vf_log_critical(message: str) -> None: ... +def vf_log_debug(message: str) -> None: ... +def vf_log_info(message: str) -> None: ... +def vf_log_trace(message: str) -> None: ... +def vf_log_warning(message: str) -> None: ... diff --git a/pythonbindings/pyfluids/__init__.py b/pythonbindings/pyfluids/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0537b758267e22a72e5030340de7b87d52f35c3 --- /dev/null +++ b/pythonbindings/pyfluids/__init__.py @@ -0,0 +1,54 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file __init__.py +! \ingroup pyfluids +! \author Henry Korb +======================================================================================= +""" +try: + from .bindings import basics +except ImportError: + print("Basics bindings not included") +try: + from .bindings import logger +except ImportError: + print("Logger bindings not included") +try: + from .bindings import lbm +except ImportError: + print("LBM bindings not included") +try: + from .bindings import gpu +except ImportError: + print("GPU bindings not included") +try: + from .bindings import cpu +except ImportError: + print("CPU bindings not included") \ No newline at end of file diff --git a/pythonbindings/pyfluids/py.typed b/pythonbindings/pyfluids/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pythonbindings/pymuparser/__init__.py b/pythonbindings/pymuparser/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..398069bcba03b3fe710d9d9a6398e9c530b19ee9 --- /dev/null +++ b/pythonbindings/pymuparser/__init__.py @@ -0,0 +1,38 @@ +r""" +======================================================================================= + ____ ____ __ ______ __________ __ __ __ __ + \ \ | | | | | _ \ |___ ___| | | | | / \ | | + \ \ | | | | | |_) | | | | | | | / \ | | + \ \ | | | | | _ / | | | | | | / /\ \ | | + \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ + \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| + \ \ | | ________________________________________________________________ + \ \ | | | ______________________________________________________________| + \ \| | | | __ __ __ __ ______ _______ + \ | | |_____ | | | | | | | | | _ \ / _____) + \ | | _____| | | | | | | | | | | \ \ \_______ + \ | | | | |_____ | \_/ | | | | |_/ / _____ | + \ _____| |__| |________| \_______/ |__| |______/ (_______/ + + This file is part of VirtualFluids. VirtualFluids is free software: you can + redistribute it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + VirtualFluids is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. + +! \file __init__.pyi +! \ingroup pymuparser +! \author Henry Korb +======================================================================================= +""" +try: + from .bindings import Parser +except ImportError as e: + raise ImportError("Pymuparser bindings were not built. Only included if VirtualFluids is built with VF_BUILD_CPU=ON.") \ No newline at end of file diff --git a/pythonbindings/src/VirtualFluids.cpp b/pythonbindings/src/VirtualFluids.cpp new file mode 100644 index 0000000000000000000000000000000000000000..20e5012e0af325440e502c704d6f372100306ab1 --- /dev/null +++ b/pythonbindings/src/VirtualFluids.cpp @@ -0,0 +1,63 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file VirtualFluids.cpp +//! \ingroup src +//! \author Henry Korb +//======================================================================================= +#include <pybind11/pybind11.h> +#include "basics/basics.cpp" +#include "lbm/lbm.cpp" +#include "logger/logger.cpp" + +#ifdef VF_GPU_PYTHONBINDINGS +#include "gpu/gpu.cpp" +#endif +#ifdef VF_CPU_PYTHONBINDINGS +#include "cpu/cpu.cpp" +#endif + + +namespace py_bindings +{ + namespace py = pybind11; + + PYBIND11_MODULE(bindings, m) + { + py::add_ostream_redirect(m, "ostream_redirect"); + basics::makeModule(m); + lbm::makeModule(m); + logging::makeModule(m); +#ifdef VF_GPU_PYTHONBINDINGS + gpu::makeModule(m); +#endif +#ifdef VF_CPU_PYTHONBINDINGS + cpu::makeModule(m); +#endif + } +} \ No newline at end of file diff --git a/pythonbindings/src/VirtualFluidsModulesCPU.cpp b/pythonbindings/src/VirtualFluidsModulesCPU.cpp deleted file mode 100644 index 2fba3da494f568f7d0d0a117a579a45c9c1b9245..0000000000000000000000000000000000000000 --- a/pythonbindings/src/VirtualFluidsModulesCPU.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include <pybind11/pybind11.h> -#include "cpu/cpu.cpp" - -namespace py_bindings -{ - namespace py = pybind11; - - PYBIND11_MODULE(pyfluids, m) - { - cpu::makeModule(m); - } -} \ No newline at end of file diff --git a/pythonbindings/src/VirtualFluidsModulesGPU.cpp b/pythonbindings/src/VirtualFluidsModulesGPU.cpp deleted file mode 100644 index b96971caf381faada76ee676cf60469492d055c2..0000000000000000000000000000000000000000 --- a/pythonbindings/src/VirtualFluidsModulesGPU.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include <pybind11/pybind11.h> -#include "basics/basics.cpp" -#include "lbm/lbm.cpp" -#include "gpu/gpu.cpp" -#include "logger/logger.cpp" - -namespace py_bindings -{ - namespace py = pybind11; - - PYBIND11_MODULE(pyfluids, m) - { - basics::makeModule(m); - gpu::makeModule(m); - lbm::makeModule(m); - logging::makeModule(m); - py::add_ostream_redirect(m, "ostream_redirect"); - } -} \ No newline at end of file diff --git a/pythonbindings/src/basics/basics.cpp b/pythonbindings/src/basics/basics.cpp index 381e345d78226b25ec3a77a14340d2ef1171c8c9..e67dfb05308511c8bf79d7e860299f062f317194 100644 --- a/pythonbindings/src/basics/basics.cpp +++ b/pythonbindings/src/basics/basics.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file basics.cpp +//! \ingroup basics +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include "submodules/logger.cpp" #include "submodules/configuration_file.cpp" diff --git a/pythonbindings/src/basics/submodules/configuration_file.cpp b/pythonbindings/src/basics/submodules/configuration_file.cpp index f5a2f87135a17f5eda34a7467d95f9db6b1c21d1..7fcd48c34824b9370eeac1872c899bf980176a52 100644 --- a/pythonbindings/src/basics/submodules/configuration_file.cpp +++ b/pythonbindings/src/basics/submodules/configuration_file.cpp @@ -1,5 +1,37 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file configuration_file.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> -#include <basics/config/ConfigurationFile.h> +#include "basics/config/ConfigurationFile.h" namespace configuration { @@ -9,6 +41,19 @@ namespace configuration { py::class_<vf::basics::ConfigurationFile>(parentModule, "ConfigurationFile") .def(py::init<>()) - .def("load", &vf::basics::ConfigurationFile::load); + .def("load", &vf::basics::ConfigurationFile::load, py::arg("file")) + .def("contains", &vf::basics::ConfigurationFile::contains, py::arg("key")) + .def("get_int_value" , static_cast<int (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_int_value" , static_cast<int (vf::basics::ConfigurationFile::*)(const std::string&, int ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")) + .def("get_uint_value" , static_cast<uint (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_uint_value" , static_cast<uint (vf::basics::ConfigurationFile::*)(const std::string&, uint ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")) + .def("get_float_value" , static_cast<float (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_float_value" , static_cast<float (vf::basics::ConfigurationFile::*)(const std::string&, float ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")) + .def("get_double_value", static_cast<double (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_double_value", static_cast<double (vf::basics::ConfigurationFile::*)(const std::string&, double ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")) + .def("get_bool_value" , static_cast<bool (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_bool_value" , static_cast<bool (vf::basics::ConfigurationFile::*)(const std::string&, bool ) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")) + .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key")) + .def("get_string_value", static_cast<std::string (vf::basics::ConfigurationFile::*)(const std::string&, std::string) const>(&vf::basics::ConfigurationFile::getValue), py::arg("key"), py::arg("default_value")); } } \ No newline at end of file diff --git a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp index ed1deeca62fc57b7f44499b306e9f99b7f990604..d20cf2d1f631f6d36a80c36f1fb6c9c59d192090 100644 --- a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp +++ b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file lbm_or_gks.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include "basics/Core/LbmOrGks.h" diff --git a/pythonbindings/src/basics/submodules/logger.cpp b/pythonbindings/src/basics/submodules/logger.cpp index d46648e349b44243581e083f3561e8a13648f3b2..fa7e00e4dca06581b7a14d2bcf2628ed6af60001 100644 --- a/pythonbindings/src/basics/submodules/logger.cpp +++ b/pythonbindings/src/basics/submodules/logger.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file logger.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <pybind11/iostream.h> #include <basics/Core/Logger/Logger.h> @@ -12,12 +44,12 @@ namespace logger py::module loggerModule = parentModule.def_submodule("logger"); py::class_<logging::Logger>(loggerModule, "Logger") - .def("add_stdout", [](){ + .def_static("add_stdout", [](){ logging::Logger::addStream(&std::cout); }) - .def("set_debug_level", &logging::Logger::setDebugLevel) - .def("time_stamp", &logging::Logger::timeStamp) - .def("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers); + .def_static("set_debug_level", &logging::Logger::setDebugLevel) + .def_static("time_stamp", &logging::Logger::timeStamp, py::arg("time_stamp")) + .def_static("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers, py::arg("print")); loggerModule.attr("log") = logging::out; py::enum_<logging::Logger::Level>(loggerModule, "Level") diff --git a/pythonbindings/src/cpu/cpu.cpp b/pythonbindings/src/cpu/cpu.cpp index 554de53b47446366693aed31d534f6145ebea8ba..75143d913596c74a26f25ce64f1e6d214a442e34 100644 --- a/pythonbindings/src/cpu/cpu.cpp +++ b/pythonbindings/src/cpu/cpu.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file cpu.cpp +//! \ingroup cpu +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include "submodules/boundaryconditions.cpp" #include "submodules/simulationconfig.cpp" diff --git a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp index 3bff7bc069ca20fe1c0cf3d1847b9714e0381505..ac9ec8605dec51e8374c850b1c1b58314674c426 100644 --- a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp +++ b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file boundaryconditions.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <pybind11/stl.h> #include <BoundaryConditions/DensityBCAdapter.h> diff --git a/pythonbindings/src/cpu/submodules/geometry.cpp b/pythonbindings/src/cpu/submodules/geometry.cpp index b7ff4dd761258d41687589d2dd89c3479093753e..4c4c47b002b9c7451a8d788ba82c4a19b78ca96f 100644 --- a/pythonbindings/src/cpu/submodules/geometry.cpp +++ b/pythonbindings/src/cpu/submodules/geometry.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file geometry.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <geometry3d/GbPoint3D.h> #include <geometry3d/GbObject3D.h> diff --git a/pythonbindings/src/cpu/submodules/kernel.cpp b/pythonbindings/src/cpu/submodules/kernel.cpp index fb291790632cc2041410f60a14fca8d966283343..b00d86579540a299e4bf3ed47bc09d4386f420a2 100644 --- a/pythonbindings/src/cpu/submodules/kernel.cpp +++ b/pythonbindings/src/cpu/submodules/kernel.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file kernel.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <memory> #include <pybind11/pybind11.h> #include <simulationconfig/KernelFactory.h> diff --git a/pythonbindings/src/cpu/submodules/simulationconfig.cpp b/pythonbindings/src/cpu/submodules/simulationconfig.cpp index 60af4e36af4dca67e9262dd9f5ee1f46d5b7bb58..09d91f44e85f03c6150c56ce5762e7629212fba0 100644 --- a/pythonbindings/src/cpu/submodules/simulationconfig.cpp +++ b/pythonbindings/src/cpu/submodules/simulationconfig.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file simulationconfig.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <simulationconfig/Simulation.h> diff --git a/pythonbindings/src/cpu/submodules/simulationparameters.cpp b/pythonbindings/src/cpu/submodules/simulationparameters.cpp index acc272f2ee412cfbafd9007b4b18610cfd0a1e9b..b33d20f9e5d335a0ed381faf8786d88cc7642738 100644 --- a/pythonbindings/src/cpu/submodules/simulationparameters.cpp +++ b/pythonbindings/src/cpu/submodules/simulationparameters.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file simulationparameters.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <pybind11/stl.h> #include <complex> diff --git a/pythonbindings/src/cpu/submodules/writer.cpp b/pythonbindings/src/cpu/submodules/writer.cpp index d5ec527a27caf63d9a3066c51e1f675b307fe0b2..f1cfd8934c2da84266a93d5bcd91eb26f5f69d3f 100644 --- a/pythonbindings/src/cpu/submodules/writer.cpp +++ b/pythonbindings/src/cpu/submodules/writer.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file writer.cpp +//! \ingroup submodules +//! \author Sven Marcus, Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <simulationconfig/WriterConfiguration.h> diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp index dc110cd5e19a9aad4937f9c2133ddf74c0ddf9bf..9eb160ae7765f16a6437e343cb878bb4b80877bf 100644 --- a/pythonbindings/src/gpu/gpu.cpp +++ b/pythonbindings/src/gpu/gpu.cpp @@ -1,14 +1,50 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file gpu.cpp +//! \ingroup gpu +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> -#include "submodules/actuator_line.cpp" #include "submodules/pre_collision_interactor.cpp" #include "submodules/simulation.cpp" #include "submodules/parameter.cpp" #include "submodules/boundary_conditions.cpp" #include "submodules/communicator.cpp" #include "submodules/cuda_memory_manager.cpp" +#include "submodules/probes.cpp" +#include "submodules/precursor_writer.cpp" #include "submodules/grid_provider.cpp" #include "submodules/grid_generator.cpp" -#include "submodules/probes.cpp" +#include "submodules/turbulence_models.cpp" +#include "submodules/transient_bc_setter.cpp" +#include "submodules/actuator_farm.cpp" +#include "submodules/grid_scaling_factory.cpp" namespace gpu { @@ -20,13 +56,17 @@ namespace gpu simulation::makeModule(gpuModule); parameter::makeModule(gpuModule); pre_collision_interactor::makeModule(gpuModule); - actuator_line::makeModule(gpuModule); + actuator_farm::makeModule(gpuModule); boundary_conditions::makeModule(gpuModule); + transient_bc_setter::makeModule(gpuModule); communicator::makeModule(gpuModule); cuda_memory_manager::makeModule(gpuModule); - grid_provider::makeModule(gpuModule); probes::makeModule(gpuModule); + precursor_writer::makeModule(gpuModule); grid_generator::makeModule(gpuModule); + grid_provider::makeModule(gpuModule); + turbulence_model::makeModule(gpuModule); + grid_scaling_factory::makeModule(gpuModule); return gpuModule; } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/actuator_farm.cpp b/pythonbindings/src/gpu/submodules/actuator_farm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a930616db3e0d0713bdf57157387d75d171603de --- /dev/null +++ b/pythonbindings/src/gpu/submodules/actuator_farm.cpp @@ -0,0 +1,171 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file actuator_farm.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= +#include <pybind11/pybind11.h> +#include <pybind11/numpy.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> +class PyActuatorFarm : public ActuatorFarm +{ +public: + using ActuatorFarm::ActuatorFarm; // Inherit constructors + void calcBladeForces() override + { + PYBIND11_OVERRIDE_NAME(void, ActuatorFarm, "calc_blade_forces", calcBladeForces); + } +}; +namespace actuator_farm +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + using arr = py::array_t<float, py::array::c_style>; + + py::class_<ActuatorFarm, PreCollisionInteractor, PyActuatorFarm, std::shared_ptr<ActuatorFarm>>(parentModule, "ActuatorFarm", py::dynamic_attr()) + .def(py::init< const uint, + const real, + const uint, + const real, + int, + const real, + const real, + const bool>(), + py::arg("number_of_blades_per_turbine"), + py::arg("density"), + py::arg("number_of_nodes_per_blade"), + py::arg("epsilon"), + py::arg("level"), + py::arg("delta_t"), + py::arg("delta_x"), + py::arg("use_host_arrays")) + .def_property_readonly("number_of_turbines", &ActuatorFarm::getNumberOfTurbines) + .def_property_readonly("number_of_nodes_per_blade", &ActuatorFarm::getNumberOfNodesPerBlade) + .def_property_readonly("number_of_blades_per_turbine", &ActuatorFarm::getNumberOfBladesPerTurbine) + .def_property_readonly("number_of_nodes", &ActuatorFarm::getNumberOfNodes) + .def_property_readonly("number_of_indices", &ActuatorFarm::getNumberOfIndices) + .def_property_readonly("density", &ActuatorFarm::getDensity) + .def_property_readonly("delta_t", &ActuatorFarm::getDeltaT) + .def_property_readonly("delta_x", &ActuatorFarm::getDeltaX) + + .def("add_turbine", &ActuatorFarm::addTurbine, py::arg("posX"), py::arg("posY"), py::arg("posZ"), py::arg("diameter"), py::arg("omega"), py::arg("azimuth"), py::arg("yaw"), py::arg("bladeRadii")) + + .def("get_turbine_pos", [](ActuatorFarm& al, uint turbine){ real position[3] = {al.getTurbinePosX(turbine), al.getTurbinePosY(turbine), al.getTurbinePosZ(turbine)}; return arr(3, position); }, py::arg("turbine")) + .def("get_turbine_azimuth", &ActuatorFarm::getTurbineAzimuth, py::arg("turbine")) + .def("get_turbine_yaw", &ActuatorFarm::getTurbineYaw, py::arg("turbine")) + .def("get_turbine_omega", &ActuatorFarm::getTurbineOmega, py::arg("turbine")) + .def("get_all_azimuths", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllAzimuths()); } ) + .def("get_all_yaws", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllYaws()); } ) + .def("get_all_omegas", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllOmegas()); } ) + .def("get_all_turbine_pos_x", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosX()); } ) + .def("get_all_turbine_pos_y", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosY()); } ) + .def("get_all_turbine_pos_z", [](ActuatorFarm& al){ return arr(al.getNumberOfTurbines(), al.getAllTurbinePosZ()); } ) + + .def("get_all_blade_radii", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfNodesPerBlade()}, al.getAllBladeRadii()); } ) + .def("get_all_blade_coords_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsX()); } ) + .def("get_all_blade_coords_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsY()); } ) + .def("get_all_blade_coords_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeCoordsZ()); } ) + .def("get_all_blade_velocities_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesX()); } ) + .def("get_all_blade_velocities_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesY()); } ) + .def("get_all_blade_velocities_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeVelocitiesZ()); } ) + .def("get_all_blade_forces_x", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesX()); } ) + .def("get_all_blade_forces_y", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesY()); } ) + .def("get_all_blade_forces_z", [](ActuatorFarm& al){ return arr({al.getNumberOfTurbines(), al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getAllBladeForcesZ()); } ) + + .def("get_turbine_blade_radii", [](ActuatorFarm& al, uint turbine){ return arr(al.getNumberOfNodesPerBlade(), al.getTurbineBladeRadiiDevice(turbine)); } , py::arg("turbine")) + .def("get_turbine_blade_coords_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_coords_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_coords_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_x", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_y", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_z", [](ActuatorFarm& al, uint turbine){ return arr({al.getNumberOfBladesPerTurbine(), al.getNumberOfNodesPerBlade()}, al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") ) + + .def("get_all_blade_radii_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t>(al.getAllBladeRadiiDevice()); } ) + .def("get_all_blade_coords_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsXDevice()); } ) + .def("get_all_blade_coords_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsYDevice()); } ) + .def("get_all_blade_coords_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeCoordsZDevice()); } ) + .def("get_all_blade_velocities_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesXDevice()); } ) + .def("get_all_blade_velocities_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesYDevice()); } ) + .def("get_all_blade_velocities_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeVelocitiesZDevice()); } ) + .def("get_all_blade_forces_x_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesXDevice()); } ) + .def("get_all_blade_forces_y_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesYDevice()); } ) + .def("get_all_blade_forces_z_device", [](ActuatorFarm& al) -> intptr_t { return reinterpret_cast<intptr_t> (al.getAllBladeForcesZDevice()); } ) + + .def("get_turbine_blade_radii_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeRadiiDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_coords_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_coords_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_coords_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeCoordsZDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_velocities_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeVelocitiesZDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_x_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesXDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_y_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesYDevice(turbine)); }, py::arg("turbine") ) + .def("get_turbine_blade_forces_z_device", [](ActuatorFarm& al, uint turbine) -> intptr_t { return reinterpret_cast<intptr_t>(al.getTurbineBladeForcesZDevice(turbine)); }, py::arg("turbine") ) + + .def("set_all_azimuths", [](ActuatorFarm& al, arr azimuths){ al.setAllAzimuths(static_cast<float *>(azimuths.request().ptr)); }, py::arg("azimuths")) + .def("set_all_yaws", [](ActuatorFarm& al, arr yaws){ al.setAllYaws(static_cast<float *>(yaws.request().ptr)); }, py::arg("yaws")) + .def("set_all_omegas", [](ActuatorFarm& al, arr omegas){ al.setAllOmegas(static_cast<float *>(omegas.request().ptr)); }, py::arg("omegas")) + + .def("set_turbine_azimuth", &ActuatorFarm::setTurbineAzimuth, py::arg("turbine"), py::arg("azimuth")) + .def("set_turbine_yaw", &ActuatorFarm::setTurbineYaw, py::arg("turbine"), py::arg("yaw")) + .def("set_turbine_omega", &ActuatorFarm::setTurbineOmega, py::arg("turbine"), py::arg("omega")) + + .def("set_all_blade_coords", [](ActuatorFarm& al, arr coordsX, arr coordsY, arr coordsZ) + { + al.setAllBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); + }, py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") ) + .def("set_all_blade_velocities", [](ActuatorFarm& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ) + { + al.setAllBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); + }, py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") ) + .def("set_all_blade_forces", [](ActuatorFarm& al, arr forcesX, arr forcesY, arr forcesZ) + { + al.setAllBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); + }, py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") ) + .def("set_turbine_blade_coords", [](ActuatorFarm& al, uint turbine, arr coordsX, arr coordsY, arr coordsZ) + { + al.setTurbineBladeCoords(turbine, static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); + }, py::arg("turbine"), py::arg("blade_coords_x"), py::arg("blade_coords_y"), py::arg("blade_coords_z") ) + .def("set_turbine_blade_velocities", [](ActuatorFarm& al, uint turbine, arr velocitiesX, arr velocitiesY, arr velocitiesZ) + { + al.setTurbineBladeVelocities(turbine, static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); + }, py::arg("turbine"), py::arg("blade_velocities_x"), py::arg("blade_velocities_y"), py::arg("blade_velocities_z") ) + .def("set_turbine_blade_forces", [](ActuatorFarm& al, uint turbine, arr forcesX, arr forcesY, arr forcesZ) + { + al.setTurbineBladeForces(turbine, static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); + }, py::arg("turbine"), py::arg("blade_forces_x"), py::arg("blade_forces_y"), py::arg("blade_forces_z") ) + .def("calc_blade_forces", &ActuatorFarm::calcBladeForces); + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/actuator_line.cpp b/pythonbindings/src/gpu/submodules/actuator_line.cpp deleted file mode 100644 index 3207fadbc37df38e53e00adcb9a86f0b8e82ba98..0000000000000000000000000000000000000000 --- a/pythonbindings/src/gpu/submodules/actuator_line.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include <pybind11/pybind11.h> -#include <pybind11/stl.h> -#include <pybind11/numpy.h> -#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h> -#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> -class PyActuatorLine : public ActuatorLine -{ -public: - using ActuatorLine::ActuatorLine; // Inherit constructors - void calcBladeForces() override - { - PYBIND11_OVERRIDE_NAME(void, ActuatorLine, "calc_blade_forces", calcBladeForces,); - } -}; -namespace actuator_line -{ - namespace py = pybind11; - - void makeModule(py::module_ &parentModule) - { - using arr = py::array_t<float, py::array::c_style>; - - py::class_<ActuatorLine, PreCollisionInteractor, PyActuatorLine, std::shared_ptr<ActuatorLine>>(parentModule, "ActuatorLine", py::dynamic_attr()) - .def(py::init< const uint, - const real, - const uint, - const real, - real, real, real, - const real, - int, - const real, - const real>(), - "n_blades", - "density", - "n_blade_nodes", - "epsilon", - "turbine_pos_x", "turbine_pos_y", "turbine_pos_z", - "diameter", - "level", - "delta_t", - "delta_x") - .def_property("omega", &ActuatorLine::getOmega, &ActuatorLine::setOmega) - .def_property("azimuth", &ActuatorLine::getAzimuth, &ActuatorLine::setAzimuth) - .def_property("yaw", &ActuatorLine::getYaw, &ActuatorLine::setYaw) - .def_property_readonly("n_blades", &ActuatorLine::getNBlades) - .def_property_readonly("n_blade_nodes", &ActuatorLine::getNBladeNodes) - .def_property_readonly("n_nodes", &ActuatorLine::getNNodes) - .def_property_readonly("n_indices", &ActuatorLine::getNIndices) - .def_property_readonly("density", &ActuatorLine::getDensity) - .def_property_readonly("position_x", &ActuatorLine::getPositionX) - .def_property_readonly("position_y", &ActuatorLine::getPositionY) - .def_property_readonly("position_z", &ActuatorLine::getPositionZ) - .def_property_readonly("position", [](ActuatorLine& al){ real position[3] = {al.getPositionX(), al.getPositionY(), al.getPositionZ()}; return arr(3, position); } ) - .def("get_radii", [](ActuatorLine& al){ return arr(al.getNBladeNodes(), al.getBladeRadii()); } ) - .def("get_blade_coords_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsX()); } ) - .def("get_blade_coords_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsY()); } ) - .def("get_blade_coords_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeCoordsZ()); } ) - .def("get_blade_velocities_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesX()); } ) - .def("get_blade_velocities_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesY()); } ) - .def("get_blade_velocities_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeVelocitiesZ()); } ) - .def("get_blade_forces_x", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesX()); } ) - .def("get_blade_forces_y", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesY()); } ) - .def("get_blade_forces_z", [](ActuatorLine& al){ return arr({al.getNBlades(), al.getNBladeNodes()}, al.getBladeForcesZ()); } ) - .def("set_blade_coords", [](ActuatorLine& al, arr coordsX, arr coordsY, arr coordsZ){ - al.setBladeCoords(static_cast<float *>(coordsX.request().ptr), static_cast<float *>(coordsY.request().ptr), static_cast<float *>(coordsZ.request().ptr)); } ) - .def("set_blade_velocities", [](ActuatorLine& al, arr velocitiesX, arr velocitiesY, arr velocitiesZ){ - al.setBladeVelocities(static_cast<float *>(velocitiesX.request().ptr), static_cast<float *>(velocitiesY.request().ptr), static_cast<float *>(velocitiesZ.request().ptr)); } ) - .def("set_blade_forces", [](ActuatorLine& al, arr forcesX, arr forcesY, arr forcesZ){ - al.setBladeForces(static_cast<float *>(forcesX.request().ptr), static_cast<float *>(forcesY.request().ptr), static_cast<float *>(forcesZ.request().ptr)); } ) - .def("calc_blade_forces", &ActuatorLine::calcBladeForces); - } -} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp index 8f941a8705c225275d25291205ebdaeef8de5c9e..865817bb16f7b164c40bdc066645fb2e1f1c842e 100644 --- a/pythonbindings/src/gpu/submodules/boundary_conditions.cpp +++ b/pythonbindings/src/gpu/submodules/boundary_conditions.cpp @@ -1,5 +1,38 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file boindary_conditions.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <gpu/GridGenerator/grid/BoundaryConditions/Side.h> +#include "gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" namespace boundary_conditions { @@ -14,7 +47,59 @@ namespace boundary_conditions .value("PY", SideType::PY) .value("MZ", SideType::MZ) .value("PZ", SideType::PZ) - .value("GEOMETRY", SideType::GEOMETRY) - .export_values(); + .value("GEOMETRY", SideType::GEOMETRY); + + py::class_<BoundaryConditionFactory>(parentModule, "BoundaryConditionFactory") + .def(py::init<>()) + .def("set_velocity_boundary_condition", &BoundaryConditionFactory::setVelocityBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_no_slip_boundary_condition", &BoundaryConditionFactory::setNoSlipBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_slip_boundary_condition", &BoundaryConditionFactory::setSlipBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_pressure_boundary_condition", &BoundaryConditionFactory::setPressureBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_stress_boundary_condition", &BoundaryConditionFactory::setStressBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_precursor_boundary_condition", &BoundaryConditionFactory::setPrecursorBoundaryCondition, py::arg("boundary_condition_type")) + .def("set_geometry_boundary_condition", &BoundaryConditionFactory::setGeometryBoundaryCondition, py::arg("boundary_condition_type")); + + py::enum_<BoundaryConditionFactory::VelocityBC>(parentModule, "VelocityBC") + .value("VelocitySimpleBounceBackCompressible", BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible) + .value("VelocityIncompressible", BoundaryConditionFactory::VelocityBC::VelocityIncompressible) + .value("VelocityCompressible", BoundaryConditionFactory::VelocityBC::VelocityCompressible) + .value("VelocityAndPressureCompressible", BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible) + .value("NotSpecified", BoundaryConditionFactory::VelocityBC::NotSpecified); + + + py::enum_<BoundaryConditionFactory::NoSlipBC>(parentModule, "NoSlipBC") + .value("NoSlipImplicitBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipImplicitBounceBack) + .value("NoSlipBounceBack", BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack) + .value("NoSlipIncompressible", BoundaryConditionFactory::NoSlipBC::NoSlipIncompressible) + .value("NoSlipCompressible", BoundaryConditionFactory::NoSlipBC::NoSlipCompressible) + .value("NoSlip3rdMomentsCompressible", BoundaryConditionFactory::NoSlipBC::NoSlip3rdMomentsCompressible); + + py::enum_<BoundaryConditionFactory::SlipBC>(parentModule, "SlipBC") + .value("SlipIncompressible", BoundaryConditionFactory::SlipBC::SlipIncompressible) + .value("SlipCompressible", BoundaryConditionFactory::SlipBC::SlipCompressible) + .value("SlipBounceBack", BoundaryConditionFactory::SlipBC::SlipBounceBack) + .value("SlipCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity) + .value("SlipPressureCompressibleTurbulentViscosity", BoundaryConditionFactory::SlipBC::SlipPressureCompressibleTurbulentViscosity) + .value("NotSpecified", BoundaryConditionFactory::SlipBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::PressureBC>(parentModule, "PressureBC") + .value("PressureEquilibrium", BoundaryConditionFactory::PressureBC::PressureEquilibrium) + .value("PressureEquilibrium2", BoundaryConditionFactory::PressureBC::PressureEquilibrium2) + .value("PressureNonEquilibriumIncompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumIncompressible) + .value("PressureNonEquilibriumCompressible", BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible) + .value("OutflowNonReflective", BoundaryConditionFactory::PressureBC::OutflowNonReflective) + .value("OutflowNonReflectivePressureCorrection", BoundaryConditionFactory::PressureBC::OutflowNonReflectivePressureCorrection) + .value("NotSpecified", BoundaryConditionFactory::PressureBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::StressBC>(parentModule, "StressBC") + .value("StressCompressible", BoundaryConditionFactory::StressBC::StressCompressible) + .value("StressBounceBack", BoundaryConditionFactory::StressBC::StressBounceBack) + .value("StressPressureBounceBack", BoundaryConditionFactory::StressBC::StressPressureBounceBack) + .value("NotSpecified", BoundaryConditionFactory::StressBC::NotSpecified); + + py::enum_<BoundaryConditionFactory::PrecursorBC>(parentModule, "PrecursorBC") + .value("VelocityPrecursor", BoundaryConditionFactory::PrecursorBC::VelocityPrecursor) + .value("DistributionsPrecursor", BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor) + .value("NotSpecified", BoundaryConditionFactory::PrecursorBC::NotSpecified); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp index edb36e2c2f774903590a16a0b406c721662827b1..26a57061933fbdbfe3447ec89eeb07116a9b974b 100644 --- a/pythonbindings/src/gpu/submodules/communicator.cpp +++ b/pythonbindings/src/gpu/submodules/communicator.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file communicator.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <gpu/VirtualFluids_GPU/Communication/Communicator.h> @@ -8,7 +40,7 @@ namespace communicator void makeModule(py::module_ &parentModule) { py::class_<vf::gpu::Communicator, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator") - .def("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference) + .def_static("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference) .def("get_number_of_process", &vf::gpu::Communicator::getNummberOfProcess) .def("get_pid", &vf::gpu::Communicator::getPID); } diff --git a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp index bf27080cb3cd050343ba42b0571827ed58870cfd..bbff4832cb73f47e3d1a5a6abd78e21da2473deb 100644 --- a/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp +++ b/pythonbindings/src/gpu/submodules/cuda_memory_manager.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file cuda_memory_manager.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h> #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h> @@ -10,6 +42,6 @@ namespace cuda_memory_manager void makeModule(py::module_ &parentModule) { py::class_<CudaMemoryManager, std::shared_ptr<CudaMemoryManager>>(parentModule, "CudaMemoryManager") - .def(py::init<std::shared_ptr<Parameter>>(), "parameter"); + .def(py::init<std::shared_ptr<Parameter>>(), py::arg("parameter")); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp index 579c06c4e00cae9646ced8b554d71631eeb7e793..3e9fb5655e26ffa6053a205da5a3e3f0f2ecd49f 100644 --- a/pythonbindings/src/gpu/submodules/grid_generator.cpp +++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp @@ -1,4 +1,37 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file grid_generator.cpp +//! \ingroup submodules +//! \author Henry Korb, Henrik Asmuth +//======================================================================================= #include <pybind11/pybind11.h> +#include "gpu/GridGenerator/utilities/communication.h" #include "gpu/GridGenerator/geometries/Object.h" #include "gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h" #include "gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h" @@ -17,51 +50,63 @@ namespace grid_generator { py::module gridGeneratorModule = parentModule.def_submodule("grid_generator"); + //TODO: + // py::enum_<CommunicationDirections>(gridGeneratorModule, "CommunicationDirections") + // .value("MX", CommunicationDirections::MX) + // .value("PX", CommunicationDirections::PX) + // .value("MY", CommunicationDirections::MY) + // .value("PY", CommunicationDirections::PY) + // .value("MZ", CommunicationDirections::MZ) + // .value("PZ", CommunicationDirections::PZ); + py::class_<GridFactory, std::shared_ptr<GridFactory>>(gridGeneratorModule, "GridFactory") - .def("make", &GridFactory::make, py::return_value_policy::reference); + .def_static("make", &GridFactory::make, py::return_value_policy::reference); - py::class_<BoundingBox>(gridGeneratorModule, "BoundingBox") - .def(py::init<real, real, real, real, real, real>(),"min_x","max_x","min_y","max_y","min_z","max_z"); + py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(gridGeneratorModule, "BoundingBox") + .def(py::init<real, real, real, real, real, real>(), py::arg("min_x"), py::arg("max_x"), py::arg("min_y"), py::arg("max_y"), py::arg("min_z"), py::arg("max_z")); py::class_<Object, std::shared_ptr<Object>>(gridGeneratorModule, "Object"); py::class_<Conglomerate, Object, std::shared_ptr<Conglomerate>>(gridGeneratorModule, "Conglomerate") - .def("make_shared", &Conglomerate::makeShared, py::return_value_policy::reference) - .def("add", &Conglomerate::add) - .def("subtract", &Conglomerate::subtract); + .def_static("make_shared", &Conglomerate::makeShared, py::return_value_policy::reference) + .def("add", &Conglomerate::add, py::arg("object")) + .def("subtract", &Conglomerate::subtract, py::arg("object")); py::class_<Cuboid, Object, std::shared_ptr<Cuboid>>(gridGeneratorModule, "Cuboid") .def(py::init<const double&, const double&, const double&, const double&, const double&, const double&>(), - "min_x1", "min_x2", "min_x3", "max_x1", "max_x2", "max_x3"); + py::arg("min_x1"), py::arg("min_x2"), py::arg("min_x3"), py::arg("max_x1"), py::arg("max_x2"), py::arg("max_x3")); py::class_<Sphere, Object, std::shared_ptr<Sphere>>(gridGeneratorModule, "Sphere") - .def("make_shared", &Sphere::makeShared, py::return_value_policy::reference); + .def_static("make_shared", &Sphere::makeShared, py::return_value_policy::reference); py::class_<TriangularMesh, Object, std::shared_ptr<TriangularMesh>>(gridGeneratorModule, "TriangularMesh") - .def("make", &TriangularMesh::make, py::return_value_policy::reference); + .def_static("make", &TriangularMesh::make, py::return_value_policy::reference); py::class_<GridBuilder, std::shared_ptr<GridBuilder>>(gridGeneratorModule, "GridBuilder") - .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels) - .def("get_grid", &GridBuilder::getGrid); + .def("get_number_of_grid_levels", &GridBuilder::getNumberOfGridLevels); py::class_<LevelGridBuilder, GridBuilder, std::shared_ptr<LevelGridBuilder>>(gridGeneratorModule, "LevelGridBuilder") - .def("get_grid", py::overload_cast<int, int>(&LevelGridBuilder::getGrid)) - .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition) - .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition) - .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition) - .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition) - .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition) - .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition); + .def("set_slip_boundary_condition", &LevelGridBuilder::setSlipBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z")) + .def("set_velocity_boundary_condition", &LevelGridBuilder::setVelocityBoundaryCondition, py::arg("side_type"), py::arg("vx"), py::arg("vy"), py::arg("vz")) + .def("set_pressure_boundary_condition", &LevelGridBuilder::setPressureBoundaryCondition, py::arg("side_type"), py::arg("rho")) + .def("set_periodic_boundary_condition", &LevelGridBuilder::setPeriodicBoundaryCondition, py::arg("periodic_x"), py::arg("periodic_y"), py::arg("periodic_z")) + .def("set_no_slip_boundary_condition", &LevelGridBuilder::setNoSlipBoundaryCondition, py::arg("side_type")) + .def("set_precursor_boundary_condition", &LevelGridBuilder::setPrecursorBoundaryCondition, py::arg("side_type"), py::arg("file_collection"), py::arg("n_t_read"), py::arg("velocity_x")=0.0f, py::arg("velocity_y")=0.0f, py::arg("velocity_z")=0.0f, py::arg("file_level_to_grid_level_map")=std::vector<uint>()) + .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z"), py::arg("sampling_offset"), py::arg("z0"), py::arg("dx")); py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder") - .def("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference) - .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid) - .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid)) - .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid)) - .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry)) - .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry)) + .def_static("make_shared", &MultipleGridBuilder::makeShared, py::return_value_policy::reference, py::arg("grid_factory")) + .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid, py::arg("start_x"), py::arg("start_y"), py::arg("start_z"), py::arg("end_x"), py::arg("end_y"), py::arg("end_z"), py::arg("delta")) + .def("add_grid", py::overload_cast<Object*>(&MultipleGridBuilder::addGrid), py::arg("grid_shape")) + .def("add_grid", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGrid), py::arg("grid_shape"), py::arg("level_fine")) + .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry), py::arg("solid_object")) + .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry), py::arg("solid_object"), py::arg("level")) .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels) - .def("build_grids", &MultipleGridBuilder::buildGrids); + .def("build_grids", &MultipleGridBuilder::buildGrids, py::arg("lbm_or_gks"), py::arg("enable_thin_walls")) + .def("set_subdomain_box", &MultipleGridBuilder::setSubDomainBox, py::arg("bounding_box")) + .def("find_communication_indices", &MultipleGridBuilder::findCommunicationIndices) + .def("set_communication_process", &MultipleGridBuilder::setCommunicationProcess) + .def("set_number_of_layers", &MultipleGridBuilder::setNumberOfLayers, py::arg("number_of_layers_fine"), py::arg("number_of_layers_between_levels")); return gridGeneratorModule; } diff --git a/pythonbindings/src/gpu/submodules/grid_provider.cpp b/pythonbindings/src/gpu/submodules/grid_provider.cpp index 02ff273e2cd1a2022943e19c9a48a447d9dfe54b..717e9d5cd82100636a5398c09662a0895ce8fb56 100644 --- a/pythonbindings/src/gpu/submodules/grid_provider.cpp +++ b/pythonbindings/src/gpu/submodules/grid_provider.cpp @@ -1,8 +1,36 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file grid_provider +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include "gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h" -// #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h> -// #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h> -// #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h" namespace grid_provider { @@ -11,6 +39,6 @@ namespace grid_provider void makeModule(py::module_ &parentModule) { py::class_<GridProvider, std::shared_ptr<GridProvider>>(parentModule, "GridProvider") - .def("make_grid_generator", &GridProvider::makeGridGenerator, py::return_value_policy::reference); + .def_static("make_grid_generator", &GridProvider::makeGridGenerator, py::return_value_policy::reference, py::arg("builder"), py::arg("para"), py::arg("cuda_memory_manager"), py::arg("communicator")); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp b/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a3a572875a4695871c482a4308acab4214dbb481 --- /dev/null +++ b/pythonbindings/src/gpu/submodules/grid_scaling_factory.cpp @@ -0,0 +1,52 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file grid_scaling_factory.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= +#include <pybind11/pybind11.h> +#include <gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h> + +namespace grid_scaling_factory +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + + py::class_<GridScalingFactory, std::shared_ptr<GridScalingFactory>>(parentModule, "GridScalingFactory") + .def(py::init<>()) + .def("set_scaling_factory", &GridScalingFactory::setScalingFactory, py::arg("scaling_type")); + + py::enum_<GridScalingFactory::GridScaling>(parentModule, "GridScaling") + .value("ScaleCompressible", GridScalingFactory::GridScaling::ScaleCompressible) + .value("ScaleRhoSq", GridScalingFactory::GridScaling::ScaleRhoSq) + .value("NotSpecified", GridScalingFactory::GridScaling::NotSpecified); + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp index 7b4e67f101e3928abbd4262557864ea1d0f45b02..a7c42223e6a5bfa3caa89c0879e4133fc4123ad0 100644 --- a/pythonbindings/src/gpu/submodules/parameter.cpp +++ b/pythonbindings/src/gpu/submodules/parameter.cpp @@ -1,10 +1,46 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file parameter.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <pybind11/functional.h> #include <pybind11/stl.h> #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h> +#include "lbm/constants/NumericConstants.h" #include <basics/config/ConfigurationFile.h> #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> + +using namespace vf::lbm::constant; + namespace parameter { namespace py = pybind11; @@ -13,42 +49,44 @@ namespace parameter { py::class_<Parameter, std::shared_ptr<Parameter>>(parentModule, "Parameter") .def(py::init< - const vf::basics::ConfigurationFile&, int, - int - >(), - "config_data", - "number_of_processes", - "my_ID") - .def("set_forcing", &Parameter::setForcing) - .def("set_diff_on", &Parameter::setDiffOn) - .def("set_comp_on", &Parameter::setCompOn) - .def("set_max_level", &Parameter::setMaxLevel) - .def("set_t_end", &Parameter::setTEnd) - .def("set_t_out", &Parameter::setTOut) - .def("set_t_start_out", &Parameter::setTStartOut) - .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel) - .def("set_output_path", &Parameter::setOutputPath) - .def("set_output_prefix", &Parameter::setOutputPrefix) - .def("set_f_name", &Parameter::setFName) - .def("set_print_files", &Parameter::setPrintFiles) - .def("set_temperature_init", &Parameter::setTemperatureInit) - .def("set_temperature_BC", &Parameter::setTemperatureBC) - .def("set_viscosity", &Parameter::setViscosity) - .def("set_velocity", &Parameter::setVelocity) - .def("set_viscosity_ratio", &Parameter::setViscosityRatio) - .def("set_velocity_ratio", &Parameter::setVelocityRatio) - .def("set_density_ratio", &Parameter::setDensityRatio) - .def("set_devices", &Parameter::setDevices) - .def("set_is_body_force", &Parameter::setIsBodyForce) - .def("set_use_AMD", &Parameter::setUseAMD) - .def("set_use_Wale", &Parameter::setUseWale) - .def("set_SGS_constant", &Parameter::setSGSConstant) - .def("set_main_kernel", &Parameter::setMainKernel) - .def("set_AD_kernel", &Parameter::setADKernel) - .def("set_use_AMD", &Parameter::setUseAMD) - .def("set_use_Wale", &Parameter::setUseWale) - .def("set_SGS_constant", &Parameter::setSGSConstant) + int, + std::optional<const vf::basics::ConfigurationFile*>>(), + py::arg("number_of_processes"), + py::arg("my_ID"), + py::arg("config_data")) + .def(py::init<int, int>(), + py::arg("number_of_processes"), + py::arg("my_ID")) + .def(py::init<const vf::basics::ConfigurationFile*>(), py::arg("config_data")) + .def("set_forcing", &Parameter::setForcing, py::arg("forcing_x"), py::arg("forcing_y"), py::arg("forcing_z")) + .def("set_quadric_limiters", &Parameter::setQuadricLimiters, py::arg("quadric_limiter_p"), py::arg("quadric_limiter_m"), py::arg("quadric_limiter_d")) + .def("set_diff_on", &Parameter::setDiffOn, py::arg("is_diff")) + .def("set_comp_on", &Parameter::setCompOn, py::arg("is_comp")) + .def("set_max_level", &Parameter::setMaxLevel, py::arg("number_of_levels")) + .def("set_timestep_end", &Parameter::setTimestepEnd, py::arg("tend")) + .def("set_timestep_out", &Parameter::setTimestepOut, py::arg("tout")) + .def("set_timestep_start_out", &Parameter::setTimestepStartOut, py::arg("t_start_out")) + .def("set_timestep_of_coarse_level", &Parameter::setTimestepOfCoarseLevel, py::arg("timestep")) + .def("set_calc_turbulence_intensity", &Parameter::setCalcTurbulenceIntensity, py::arg("calc_velocity_and_fluctuations")) + .def("set_output_path", &Parameter::setOutputPath, py::arg("o_path")) + .def("set_output_prefix", &Parameter::setOutputPrefix, py::arg("o_prefix")) + .def("set_print_files", &Parameter::setPrintFiles, py::arg("print_files")) + .def("set_temperature_init", &Parameter::setTemperatureInit, py::arg("temp")) + .def("set_temperature_BC", &Parameter::setTemperatureBC, py::arg("temp_bc")) + .def("set_viscosity_LB", &Parameter::setViscosityLB, py::arg("viscosity")) + .def("set_velocity_LB", &Parameter::setVelocityLB, py::arg("velocity")) + .def("set_viscosity_ratio", &Parameter::setViscosityRatio, py::arg("viscosity_ratio")) + .def("set_velocity_ratio", &Parameter::setVelocityRatio, py::arg("velocity_ratio")) + .def("set_density_ratio", &Parameter::setDensityRatio, py::arg("density_ratio")) + .def("set_devices", &Parameter::setDevices, py::arg("devices")) + .def("set_max_dev", &Parameter::setMaxDev, py::arg("max_dev")) + .def("set_is_body_force", &Parameter::setIsBodyForce, py::arg("is_body_force")) + .def("set_use_streams", &Parameter::setUseStreams, py::arg("use_streams")) + .def("set_main_kernel", &Parameter::setMainKernel, py::arg("kernel")) + .def("set_AD_kernel", &Parameter::setADKernel, py::arg("ad_kernel")) + .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor, py::arg("has_wall_monitor")) + .def("set_outflow_pressure_correction_factor", &Parameter::setOutflowPressureCorrectionFactor, py::arg("correction_factor")) .def("set_initial_condition", [](Parameter ¶, std::function<std::vector<float>(real, real, real)> &init_func) { para.setInitialCondition([init_func](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) @@ -59,9 +97,46 @@ namespace parameter vy = values[2]; vz = values[3]; }); - }) - .def("add_actuator", &Parameter::addActuator) - .def("add_probe", &Parameter::addProbe) + }, py::arg("init_func")) + .def("set_initial_condition_uniform", [](Parameter ¶, real velocity_x, real velocity_y, real velocity_z) + { + para.setInitialCondition([velocity_x, velocity_y, velocity_z](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) // must capture values explicitly! + { + rho = c0o1; + vx = velocity_x; + vy = velocity_y; + vz = velocity_z; + }); + }, py::arg("velocity_x"), py::arg("velocity_y"), py::arg("velocity_z")) + .def("set_initial_condition_log_law", [](Parameter ¶, real u_star, real z0, real velocityRatio) + { + para.setInitialCondition( + [u_star, z0, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) + { + coordZ = coordZ > c0o1 ? coordZ : c0o1; + + rho = c0o1; + vx = u_star/c4o10 * log(coordZ/z0+c1o1) / velocityRatio; + vy = c0o1; + vz = c0o1; + } + ); + }, py::arg("u_star"), py::arg("z0"), py::arg("velocity_ratio")) + .def("set_initial_condition_perturbed_log_law", [](Parameter ¶, real u_star, real z0, real L_x, real L_z, real H, real velocityRatio) + { + para.setInitialCondition( + [u_star, z0, L_x, L_z, H, velocityRatio](real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz) + { + coordZ = coordZ > c0o1 ? coordZ : c0o1; + rho = c0o1; + vx = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) / velocityRatio; + vy = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) / velocityRatio; + vz = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) / velocityRatio; + } + ); + }, py::arg("u_star"), py::arg("z0"), py::arg("length_x"), py::arg("length_z"), py::arg("height"), py::arg("velocity_ratio")) + .def("add_actuator", &Parameter::addActuator, py::arg("actuator")) + .def("add_probe", &Parameter::addProbe, py::arg("probe")) .def("get_output_path", &Parameter::getOutputPath) .def("get_output_prefix", &Parameter::getOutputPrefix) .def("get_velocity", &Parameter::getVelocity) @@ -70,11 +145,9 @@ namespace parameter .def("get_viscosity_ratio", &Parameter::getViscosityRatio) .def("get_density_ratio", &Parameter::getDensityRatio) .def("get_force_ratio", &Parameter::getForceRatio) - .def("get_use_AMD", &Parameter::getUseAMD) - .def("get_use_Wale", &Parameter::getUseWale) .def("get_SGS_constant", &Parameter::getSGSConstant) .def("get_is_body_force", &Parameter::getIsBodyForce) - .def("set_has_wall_model_monitor", &Parameter::setHasWallModelMonitor) ; + } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp index 362ee1a8ce6112cfa9543f1b254e10f3e35822a1..308f6c37aada14c8c25c69245f603274ae2f18d8 100644 --- a/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp +++ b/pythonbindings/src/gpu/submodules/pre_collision_interactor.cpp @@ -1,3 +1,36 @@ + +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file pre_collision_interactor.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> diff --git a/pythonbindings/src/gpu/submodules/precursor_writer.cpp b/pythonbindings/src/gpu/submodules/precursor_writer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..64164ef9993d7b4f22bff2390b418718f7c3208f --- /dev/null +++ b/pythonbindings/src/gpu/submodules/precursor_writer.cpp @@ -0,0 +1,67 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file precursor_writer.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> +#include <pybind11/numpy.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h> +#include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h> + +namespace precursor_writer +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<OutputVariable>(parentModule, "OutputVariable") + .value("Velocities", OutputVariable::Velocities) + .value("Distributions", OutputVariable::Distributions); + + py::class_<PrecursorWriter, PreCollisionInteractor, std::shared_ptr<PrecursorWriter>>(parentModule, "PrecursorWriter") + .def(py::init < std::string, + std::string, + real, + real, real, + real, real, + uint, uint, + OutputVariable, + uint>(), + py::arg("filename"), + py::arg("output_path"), + py::arg("x_pos"), + py::arg("y_min"), py::arg("y_max"), + py::arg("z_min"), py::arg("z_max"), + py::arg("t_start_out"), py::arg("t_save"), + py::arg("output_variable"), + py::arg("max_timesteps_per_file")); + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/probes.cpp b/pythonbindings/src/gpu/submodules/probes.cpp index 6993d9617d870922d7ed90ed9ecbebb8a797be25..7c26958df81a60f00c9909a91f5576a5931652d4 100644 --- a/pythonbindings/src/gpu/submodules/probes.cpp +++ b/pythonbindings/src/gpu/submodules/probes.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file probes.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <pybind11/stl.h> #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h> @@ -29,7 +61,7 @@ namespace probes .value("SpatioTemporalFlatness", Statistic::SpatioTemporalFlatness); py::class_<Probe, PreCollisionInteractor, std::shared_ptr<Probe>>(probeModule, "Probe") - .def("add_statistic", &Probe::addStatistic) + .def("add_statistic", &Probe::addStatistic, py::arg("variable")) .def("set_file_name_to_n_out", &Probe::setFileNameToNOut) .def("add_all_available_statistics", &Probe::addAllAvailableStatistics); @@ -41,14 +73,14 @@ namespace probes uint, uint, uint>(), - "probe_name", - "output_path" - "t_start_avg", - "t_avg", - "t_start_out", - "t_out") - .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList) - .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane); + py::arg("probe_name"), + py::arg("output_path"), + py::arg("t_start_avg"), + py::arg("t_avg"), + py::arg("t_start_out"), + py::arg("t_out")) + .def("add_probe_points_from_list", &PointProbe::addProbePointsFromList, py::arg("point_coords_x"), py::arg("point_coords_y"), py::arg("point_coords_z")) + .def("add_probe_points_from_x_normal_plane", &PointProbe::addProbePointsFromXNormalPlane, py::arg("pos_x"), py::arg("pos0_y"), py::arg("pos0_z"), py::arg("pos1_y"), py::arg("pos1_z"), py::arg("n_y"), py::arg("n_z")); py::class_<PlaneProbe, Probe, std::shared_ptr<PlaneProbe>>(probeModule, "PlaneProbe") .def(py::init< @@ -58,13 +90,13 @@ namespace probes uint, uint, uint>(), - "probe_name", - "output_path" - "t_start_avg", - "t_avg", - "t_start_out", - "t_out") - .def("set_probe_plane", &PlaneProbe::setProbePlane); + py::arg("probe_name"), + py::arg("output_path"), + py::arg("t_start_avg"), + py::arg("t_avg"), + py::arg("t_start_out"), + py::arg("t_out")) + .def("set_probe_plane", &PlaneProbe::setProbePlane, py::arg("pos_x"), py::arg("pos_y"), py::arg("pos_z"), py::arg("delta_x"), py::arg("delta_y"), py::arg("delta_z")); py::class_<PlanarAverageProbe, Probe, std::shared_ptr<PlanarAverageProbe>>(probeModule, "PlanarAverageProbe") .def(py::init< @@ -76,14 +108,14 @@ namespace probes uint, uint, char>(), - "probe_name", - "output_path", - "t_start_avg", - "t_start_tmp_avg", - "t_avg", - "t_start_out", - "t_out", - "plane_normal"); + py::arg("probe_name"), + py::arg("output_path"), + py::arg("t_start_avg"), + py::arg("t_start_tmp_avg"), + py::arg("t_avg"), + py::arg("t_start_out"), + py::arg("t_out"), + py::arg("plane_normal")); py::class_<WallModelProbe, Probe, std::shared_ptr<WallModelProbe>>(probeModule, "WallModelProbe") @@ -95,15 +127,15 @@ namespace probes uint, uint, uint>(), - "probe_name", - "output_path" - "t_start_avg", - "t_start_tmp_avg", - "t_avg", - "t_start_out", - "t_out") - .def("set_force_output_to_stress", &WallModelProbe::setForceOutputToStress) - .def("set_evaluate_pressure_gradient", &WallModelProbe::setEvaluatePressureGradient); + py::arg("probe_name"), + py::arg("output_path"), + py::arg("t_start_avg"), + py::arg("t_start_tmp_avg"), + py::arg("t_avg"), + py::arg("t_start_out"), + py::arg("t_out")) + .def("set_force_output_to_stress", &WallModelProbe::setForceOutputToStress, py::arg("output_stress")) + .def("set_evaluate_pressure_gradient", &WallModelProbe::setEvaluatePressureGradient, py::arg("eval_press_grad")); return probeModule; } diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp index b775d604ba41530223f22738c72785b2c15348b3..d32ef272a1fd26510439dde6ab3a9438d68009a7 100644 --- a/pythonbindings/src/gpu/submodules/simulation.cpp +++ b/pythonbindings/src/gpu/submodules/simulation.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file simulation.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <gpu/VirtualFluids_GPU/LBM/Simulation.h> #include <gpu/VirtualFluids_GPU/Communication/Communicator.h> @@ -8,6 +40,9 @@ #include <gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h> #include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h> #include <gpu/VirtualFluids_GPU/Output/DataWriter.h> +#include "gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h" +#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" +#include "gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h" namespace simulation { @@ -20,13 +55,41 @@ namespace simulation .def(py::init< std::shared_ptr<Parameter>, std::shared_ptr<CudaMemoryManager>, vf::gpu::Communicator &, - GridProvider &>(), - "parameter", - "memoryManager", - "communicator", - "gridProvider") + GridProvider &, + BoundaryConditionFactory*, + GridScalingFactory*>(), + py::arg("parameter"), + py::arg("memoryManager"), + py::arg("communicator"), + py::arg("gridProvider"), + py::arg("bcFactory"), + py::arg("gridScalingFactory")) + .def(py::init< std::shared_ptr<Parameter>, + std::shared_ptr<CudaMemoryManager>, + vf::gpu::Communicator &, + GridProvider &, + BoundaryConditionFactory*>(), + py::arg("parameter"), + py::arg("memoryManager"), + py::arg("communicator"), + py::arg("gridProvider"), + py::arg("bcFactory")) + .def(py::init< std::shared_ptr<Parameter>, + std::shared_ptr<CudaMemoryManager>, + vf::gpu::Communicator &, + GridProvider &, + BoundaryConditionFactory*, + std::shared_ptr<TurbulenceModelFactory>, + GridScalingFactory*>(), + py::arg("parameter"), + py::arg("memoryManager"), + py::arg("communicator"), + py::arg("gridProvider"), + py::arg("bcFactory"), + py::arg("tmFactory"), + py::arg("gridScalingFactory")) .def("run", &Simulation::run) - .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer) - .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer); + .def("addKineticEnergyAnalyzer", &Simulation::addKineticEnergyAnalyzer, py::arg("t_analyse")) + .def("addEnstrophyAnalyzer", &Simulation::addEnstrophyAnalyzer, py::arg("t_analyse")); } } \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp b/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..89370ef4c1b91a0c8e480e968a1df3bd4fe540ca --- /dev/null +++ b/pythonbindings/src/gpu/submodules/transient_bc_setter.cpp @@ -0,0 +1,52 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file transient_bc_setter.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= +#include <pybind11/pybind11.h> +#include <gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h> + +namespace transient_bc_setter +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<FileType>(parentModule, "FileType") + .value("VTK", FileType::VTK); + + parentModule.def("create_file_collection", &createFileCollection, py::arg("prefix"), py::arg("type")); + + py::class_<FileCollection, std::shared_ptr<FileCollection>>(parentModule, "FileCollection"); + + py::class_<VTKFileCollection, FileCollection, std::shared_ptr<VTKFileCollection>>(parentModule, "VTKFileCollection") + .def(py::init <std::string>(), py::arg("prefix")); + } +} \ No newline at end of file diff --git a/pythonbindings/src/gpu/submodules/turbulence_models.cpp b/pythonbindings/src/gpu/submodules/turbulence_models.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfbb9e56127fee0cd90a482dde258d8b96389989 --- /dev/null +++ b/pythonbindings/src/gpu/submodules/turbulence_models.cpp @@ -0,0 +1,56 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file turbulence_models.cpp +//! \ingroup submodules +//! \author Henry Korb +//======================================================================================= +#include "pybind11/pybind11.h" +#include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h" +#include "gpu/VirtualFluids_GPU/LBM/LB.h" + +namespace turbulence_model +{ + namespace py = pybind11; + + void makeModule(py::module_ &parentModule) + { + py::enum_<TurbulenceModel>(parentModule, "TurbulenceModel") + .value("Smagorinsky", TurbulenceModel::Smagorinsky) + .value("AMD", TurbulenceModel::AMD) + .value("QR", TurbulenceModel::QR) + .value("None", TurbulenceModel::None); + + py::class_<TurbulenceModelFactory, std::shared_ptr<TurbulenceModelFactory>>(parentModule, "TurbulenceModelFactory") + .def(py::init< std::shared_ptr<Parameter>>(), py::arg("para")) + .def("set_turbulence_model", &TurbulenceModelFactory::setTurbulenceModel, py::arg("turbulence_model")) + .def("set_model_constant", &TurbulenceModelFactory::setModelConstant, py::arg("model_constant")) + .def("read_config_file", &TurbulenceModelFactory::readConfigFile, py::arg("config_data")); + + } +} \ No newline at end of file diff --git a/pythonbindings/src/lbm/lbm.cpp b/pythonbindings/src/lbm/lbm.cpp index 441b9ff372f4e4513fee58c4a8a1cd78d38582dd..90fd4a71b0101469666936c89974de316e0e2b18 100644 --- a/pythonbindings/src/lbm/lbm.cpp +++ b/pythonbindings/src/lbm/lbm.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file lbm.cpp +//! \ingroup lbm +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> namespace lbm diff --git a/pythonbindings/src/logger/logger.cpp b/pythonbindings/src/logger/logger.cpp index 82ad3d92760ae38c0eb62b16be726e4eeaca08ac..555b502fa9a56299895de0fa6dd6cfeb66c15024 100644 --- a/pythonbindings/src/logger/logger.cpp +++ b/pythonbindings/src/logger/logger.cpp @@ -1,3 +1,35 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file logging.cpp +//! \ingroup logger +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <logger/Logger.h> @@ -10,15 +42,15 @@ namespace logging py::module loggerModule = parentModule.def_submodule("logger"); py::class_<vf::logging::Logger>(loggerModule, "Logger") - .def("initialize_logger", &vf::logging::Logger::initalizeLogger) - .def("change_log_path", &vf::logging::Logger::changeLogPath); + .def_static("initialize_logger", &vf::logging::Logger::initalizeLogger) + .def_static("change_log_path", &vf::logging::Logger::changeLogPath, py::arg("path")); // use f-strings (f"text {float}") in python for compounded messages - loggerModule.def("vf_log_trace", [](std::string arg){ VF_LOG_TRACE(arg); }); - loggerModule.def("vf_log_debug", [](std::string arg){ VF_LOG_DEBUG(arg); }); - loggerModule.def("vf_log_info", [](std::string arg){ VF_LOG_INFO(arg); }); - loggerModule.def("vf_log_warning", [](std::string arg){ VF_LOG_WARNING(arg); }); - loggerModule.def("vf_log_critical", [](std::string arg){ VF_LOG_CRITICAL(arg); }); + loggerModule.def("vf_log_trace", [](std::string message){ VF_LOG_TRACE(message); }, py::arg("message")); + loggerModule.def("vf_log_debug", [](std::string message){ VF_LOG_DEBUG(message); }, py::arg("message")); + loggerModule.def("vf_log_info", [](std::string message){ VF_LOG_INFO(message); }, py::arg("message")); + loggerModule.def("vf_log_warning", [](std::string message){ VF_LOG_WARNING(message); }, py::arg("message")); + loggerModule.def("vf_log_critical", [](std::string message){ VF_LOG_CRITICAL(message); }, py::arg("message")); return loggerModule; } diff --git a/pythonbindings/src/muParser.cpp b/pythonbindings/src/muParser.cpp index 47408c2758fc92991f1be3113d78b8741215b152..eec39de0b72c21aaa924ea805414847aa9de4492 100644 --- a/pythonbindings/src/muParser.cpp +++ b/pythonbindings/src/muParser.cpp @@ -1,9 +1,41 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file muParser.cpp +//! \ingroup src +//! \author Henry Korb +//======================================================================================= #include <pybind11/pybind11.h> #include <muParser.h> namespace py = pybind11; -PYBIND11_MODULE(pymuparser, m) { +PYBIND11_MODULE(bindings, m) { py::class_<mu::ParserBase>(m, "_ParserBase"); py::class_<mu::Parser, mu::ParserBase>(m, "Parser") diff --git a/regression-tests/driven_cavity_test.sh b/regression-tests/driven_cavity_test.sh index e10a829d2680ab647ba0f66e0f2e85a70186007e..7f799facb4459ddafcd8b210a5477954af1444cb 100755 --- a/regression-tests/driven_cavity_test.sh +++ b/regression-tests/driven_cavity_test.sh @@ -7,8 +7,8 @@ # build VirtualFluids accordingly to our specific test scenario. # in this case adding -DUSER_APPS="apps/gpu/LBM/DrivenCavity to the cmake command is not necessary, because the DrivenCavity is added to VirtualFluids by default. mkdir -p build -cmake -B build --preset=gpu_make -DCMAKE_CUDA_ARCHITECTURES=75 #-DUSER_APPS="apps/gpu/LBM/DrivenCavity" -cd build && make -j 8 && cd .. +cmake -B build --preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 #-DUSER_APPS="apps/gpu/LBM/DrivenCavity" +cmake --build build --parallel 8 # execute VirtualFluids ./build/bin/DrivenCavity @@ -21,4 +21,4 @@ PATH_TO_DIR=output/DrivenCavity PATH_TO_REFERENCE_DIR=regression-tests/reference_data/regression_tests/gpu/DrivenCavity_2Levels # execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare) -fieldcompare dir $PATH_TO_DIR --reference $PATH_TO_REFERENCE_DIR --include-files "*.vtu" \ No newline at end of file +fieldcompare dir $PATH_TO_DIR $PATH_TO_REFERENCE_DIR --include-files "*.vtu" diff --git a/regression-tests/driven_cavity_uniform_test.sh b/regression-tests/driven_cavity_uniform_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..95e2bab635d3a6a73fb514a1f67902083c98e5d3 --- /dev/null +++ b/regression-tests/driven_cavity_uniform_test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +################################# +# Driven Cavity Regression Test +################################# + +# build VirtualFluids accordingly to our specific test scenario. +# in this case adding -DUSER_APPS="apps/gpu/LBM/DrivenCavity to the cmake command is not necessary, because the DrivenCavity is added to VirtualFluids by default. +mkdir -p build +cmake -B build --preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS="apps/gpu/LBM/DrivenCavityUniform" +cmake --build build --parallel 8 + +# execute VirtualFluids +./build/bin/DrivenCavityUniform + + +# set the path to the produced data +PATH_TO_DIR=output/DrivenCavity_uniform + +# set the path to the reference data. +# `regression-tests/reference_data` is fix `regression_tests/gpu/DrivenCavity_uniform_2022_12_16` must match the structure in https://github.com/irmb/test_data: +PATH_TO_REFERENCE_DIR=regression-tests/reference_data/regression_tests/gpu/DrivenCavity_uniform + +# execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare) +fieldcompare dir $PATH_TO_DIR $PATH_TO_REFERENCE_DIR --include-files "*.vtu" diff --git a/regression-tests/multigpu_test/rocket.yml b/regression-tests/multigpu_test/rocket.yml new file mode 100755 index 0000000000000000000000000000000000000000..f621b1349c042e02f2e834e697147da0822ffe1f --- /dev/null +++ b/regression-tests/multigpu_test/rocket.yml @@ -0,0 +1,48 @@ +host: $PHOENIX_REMOTE_HOST +user: $PHOENIX_REMOTE_USER +private_keyfile: $PHOENIX_PRIVATE_KEY + +copy: + - from: regression-tests/multigpu_test/slurm.job + to: multigpu_test/slurm.job + overwrite: true + + - from: "CMake/" + to: "multigpu_test/CMake/" + overwrite: true + + - from: "3rdParty/" + to: "multigpu_test/3rdParty/" + overwrite: true + + - from: "CMakeLists.txt" + to: "multigpu_test/CMakeLists.txt" + overwrite: true + + - from: "gpu.cmake" + to: "multigpu_test/gpu.cmake" + overwrite: true + + - from: "src/" + to: "multigpu_test/src/" + overwrite: true + + - from: "CMakePresets.json" + to: "multigpu_test/CMakePresets.json" + overwrite: true + + - from: "apps/gpu/LBM/" + to: "multigpu_test/apps/gpu/LBM/" + overwrite: true + +collect: + - from: multigpu_test/output/ + to: output/results/ + overwrite: true + + - from: multigpu_test/slurmMultiGPU.out + to: output/slurmMultiGPU.out + overwrite: true + +sbatch: multigpu_test/slurm.job +continue_if_job_fails: true diff --git a/regression-tests/multigpu_test/slurm.job b/regression-tests/multigpu_test/slurm.job new file mode 100755 index 0000000000000000000000000000000000000000..0ee0df46ab64bab6520f9f46fc939d5b3186fae7 --- /dev/null +++ b/regression-tests/multigpu_test/slurm.job @@ -0,0 +1,29 @@ +#!/bin/bash -l + +#SBATCH --partition=gpu01_queue +#SBATCH --nodes=1 +#SBATCH --time=10:00:00 +#SBATCH --job-name=Cavity4GPU +#SBATCH --ntasks-per-node=4 +#SBATCH --gres=gpu:4 +#SBATCH --output=multigpu_test/slurmMultiGPU.out +##SBATCH --exclusive + +module purge +module load comp/ccache/4.1 # loads comp/gcc/9.3.0 +module load mpi/openmpi/4.0.5_gcc_9.3/openmpi +module load cuda/11.3 +module load comp/git/2.27.0 +PATH=/home/irmb/tools/cmake-3.20.3-linux-x86_64/bin:$PATH + +module list + +cd multigpu_test +mkdir -p build +cd build +cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS="apps/gpu/LBM/DrivenCavityMultiGPU" +make -j 16 +cd .. +mkdir -p output + +mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU.txt" \ No newline at end of file diff --git a/regression-tests/regression-tests.sh b/regression-tests/regression-tests.sh index 5b7d227907594b727103be91d2382c05a07b9c6f..9f5dc8cf758b380709fcc9ad8020d1335f760f64 100755 --- a/regression-tests/regression-tests.sh +++ b/regression-tests/regression-tests.sh @@ -13,11 +13,10 @@ git clone https://github.com/irmb/test_data regression-tests/reference_data # by cloning our meshio patch and fieldcompare into a venv python3 -m venv .venv source .venv/bin/activate -pip install rich -pip install git+https://github.com/soerenPeters/meshio@update-pyproject-version -pip install git+https://gitlab.com/dglaeser/fieldcompare +pip install fieldcompare # 3. Running the specific tests +./regression-tests/driven_cavity_uniform_test.sh ./regression-tests/driven_cavity_test.sh diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..5894f9dec06953c3eeb909af96db9cb19d202d65 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,11 @@ +[metadata] +name = pyfluids +description = Python binding for VirtualFluids +long_description = file: README.md +long_description_content_type = text/markdown +platforms = any +url = https://git.rz.tu-bs.de/irmb/virtualfluids +version = 0.1.0 + +[options] +python_requires = >=3.6 diff --git a/setup.py b/setup.py index b26e1c13d09447d17f8e9fd6e2cd0d0671595bf3..530431b3775970b5222bc87d32bfb407363f95d6 100644 --- a/setup.py +++ b/setup.py @@ -1,137 +1,72 @@ -import os -import re import sys -import platform -import subprocess +from pathlib import Path +from typing import List -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext -from setuptools.command.install import install -from setuptools.command.develop import develop -from distutils.version import LooseVersion +import skbuild """ -Install python wrapper of virtual fluids -Install GPU backend with option --GPU -(pass to pip via --install-option="--GPU") +Install python wrapper of Virtual Fluids +install via python: + python setup.py install + set CMAKE Flags via -DBUILD_VF_GPU:BOOL=ON + CMAKE flags have to be separated by -- + example: python setup.py install -- -DBUILD_VF_CPU:BOOL=ON +or install via pip: + pip install . + for pip>21: + set CMAKE Flags via --config-settings "-DBUILD_VF_GPU=ON" + example: pip install . --config-settings="-DBUILD_VF_GPU=ON" + each option has to be passed in individually i.e --config-settings="-DOPT1=ON" --config-settings="-DOPT2=OFF" + for pip <21: + set CMAKE Flags via --global-option ="-DBUILD_VF_GPU=ON" + example: pip install . --global-option="-DBUILD_VF_GPU=ON" """ -vf_cmake_args = [ - "-DBUILD_VF_PYTHON_BINDINGS=ON", - "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", - "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", - "-DCMAKE_C_COMPILER_LAUNCHER=ccache", - "-DBUILD_SHARED_LIBS=OFF", - "-DBUILD_WARNINGS_AS_ERRORS=OFF" -] - -vf_cpu_cmake_args = [ - "-DBUILD_VF_DOUBLE_ACCURACY=ON", - "-DBUILD_VF_CPU:BOOL=ON", - "-DBUILD_VF_UNIT_TESTS:BOOL=ON", - "-DUSE_METIS=ON", - "-DUSE_MPI=ON" -] - -vf_gpu_cmake_args = [ - "-DBUILD_VF_DOUBLE_ACCURACY=OFF", - "-DBUILD_VF_GPU:BOOL=ON", - "-DBUILD_VF_UNIT_TESTS:BOOL=OFF", -] - -GPU = False - -class CommandMixin: - user_options = [ - ('GPU', None, 'compile pyfluids with GPU backend'), +package_name = "pyfluids" +target = "python_bindings" +src_dir = "pythonbindings" +stub_package = package_name+"-stubs" + +stub_dir = Path(src_dir)/stub_package + + +def add_subfiles(dir_path: Path, suffix: str, root_dir: Path) -> List[str]: + files = [] + for f in dir_path.iterdir(): + if f.is_dir(): + files.extend(add_subfiles(f, suffix, root_dir)) + if f.is_file(): + if f.suffix != suffix: + continue + files.append(str(f.relative_to(root_dir))) + return files + +def add_directory(dir_path: Path, suffix: str): + return add_subfiles(dir_path, suffix, dir_path) + +stub_files = add_directory(stub_dir, ".pyi") + +# hack to get config-args for installation with pip>21 +cmake_args = [] +if "config_args" in locals(): + cmake_args.extend([f"{k}={v}" for k, v in locals()["config_args"].items()]) + +cmake_args += [ + f"-DPython3_ROOT_DIR={Path(sys.prefix)}", + "-DBUILD_VF_PYTHON_BINDINGS=ON", + "-DBUILD_SHARED_LIBS=OFF", + "-DBUILD_VF_DOUBLE_ACCURACY=OFF", + "-DBUILD_VF_UNIT_TESTS:BOOL=OFF", + "-DBUILD_WARNINGS_AS_ERRORS=OFF", ] - def initialize_options(self): - super().initialize_options() - self.GPU = False - - def finalize_options(self): - super().finalize_options() - - def run(self): - global GPU - GPU = GPU or self.GPU - super().run() - - -class InstallCommand(CommandMixin, install): - user_options = getattr(install, 'user_options', []) + CommandMixin.user_options - - -class DevelopCommand(CommandMixin, develop): - user_options = getattr(develop, 'user_options', []) + CommandMixin.user_options - - -class CMakeExtension(Extension): - def __init__(self, name, sourcedir=''): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) - - -class CMakeBuild(CommandMixin, build_ext): - user_options = getattr(build_ext, 'user_options', []) + CommandMixin.user_options - - def run(self): - super().run() - try: - out = subprocess.check_output(['cmake', '--version']) - except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - - if platform.system() == "Windows": - cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) - if cmake_version < '3.1.0': - raise RuntimeError("CMake >= 3.1.0 is required on Windows") - - for ext in self.extensions: - self.build_extension(ext) - - def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - # required for auto-detection of auxiliary "native" libs - if not extdir.endswith(os.path.sep): - extdir += os.path.sep - - cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, - '-DPYTHON_EXECUTABLE=' + sys.executable] - - cfg = 'Debug' if self.debug else 'Release' - build_args = ['--config', cfg] - - if platform.system() == "Windows": - cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] - if sys.maxsize > 2**32: - cmake_args += ['-A', 'x64'] - build_args += ['--', '/m'] - else: - cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j2'] - - cmake_args.extend(vf_cmake_args) - cmake_args.extend(vf_gpu_cmake_args if GPU else vf_cpu_cmake_args) - - env = os.environ.copy() - env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), - self.distribution.get_version()) - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - cmake_cache_file = self.build_temp+"/CMakeCache.txt" - if os.path.exists(cmake_cache_file): - os.remove(cmake_cache_file) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) - - -setup( - name='pyfluids', - version='0.0.1', - ext_modules=[CMakeExtension('pyfluids')], - cmdclass={"install": InstallCommand, "develop": DevelopCommand, "build_ext": CMakeBuild}, - zip_safe=False, +skbuild.setup( + name=package_name, + packages=[package_name, "pymuparser", "pyfluids-stubs"], + package_dir={"": src_dir}, + cmake_args=cmake_args, + cmake_install_target=target, + package_data={ "pyfluids": ["py.typed"], + "pyfluids-stubs": stub_files}, + include_package_data=True, ) diff --git a/src/basics/basics/utilities/UbTuple.h b/src/basics/basics/utilities/UbTuple.h index fe9c787cead38621beafab3d082122277bdcff73..228ab48898e5e61777d2fcc0061eb6f0434d5cad 100644 --- a/src/basics/basics/utilities/UbTuple.h +++ b/src/basics/basics/utilities/UbTuple.h @@ -597,6 +597,8 @@ inline UbTuple<T1, T2, T3, T4, T5, T6, T7, T8> makeUbTuple(T1 const &a1, T2 cons // some typedefs using UbTupleFloat2 = UbTuple<float, float>; using UbTupleFloat3 = UbTuple<float, float, float>; +using UbTupleFloat4 = UbTuple<float, float, float, float>; +using UbTupleFloat6 = UbTuple<float, float, float,float, float, float>; using UbTupleInt2 = UbTuple<int, int>; using UbTupleInt3 = UbTuple<int, int, int>; using UbTupleInt4 = UbTuple<int, int, int, int>; diff --git a/src/basics/basics/writer/WbWriter.h b/src/basics/basics/writer/WbWriter.h index 26d43464c03311a2cbc14cd4fc9fe717d4b01531..55dceb7cb4a64dc90f0677796cab52135b726f56 100644 --- a/src/basics/basics/writer/WbWriter.h +++ b/src/basics/basics/writer/WbWriter.h @@ -88,7 +88,12 @@ public: { throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name()); } - + virtual std::string writeLinesWithLineData(const std::string & /*filename*/, std::vector<UbTupleFloat3> & /*nodes*/, + std::vector<UbTupleInt2> & /*lines*/, std::vector<std::string> & /*datanames*/, + std::vector<std::vector<float>> & /*celldata*/) + { + throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name()); + } ////////////////////////////////////////////////////////////////////////// // triangles // cell numbering: diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp index 6731fa56026ca284ad671cb6ce59000a609bbb8c..55c3541983ea4248512508146792832a34a1c563 100644 --- a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp +++ b/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp @@ -34,6 +34,8 @@ #include <basics/writer/WbWriterVtkXmlASCII.h> #include <basics/writer/WbWriterVtkXmlBinary.h> #include <cstring> +#include <fstream> +#include <string> using namespace std; @@ -154,12 +156,13 @@ string WbWriterVtkXmlBinary::writeParallelFile(const string &filename, vector<st return vtkfilename; } + /*===============================================================================*/ -string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes, - vector<UbTupleInt2> &lines) + +// helper functions + +ofstream createFileStream(std::string vtkfilename) { - string vtkfilename = filename + getFileExtension(); - UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start"); ofstream out(vtkfilename.c_str(), ios::out | ios::binary); if (!out) { @@ -172,89 +175,199 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl if (!out) throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); } + return out; +} - int nofNodes = (int)nodes.size(); - int nofCells = (int)lines.size(); - - int bytesPerByteVal = 4; //==sizeof(int) - int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 2 /*nodes per line */ * nofCells * sizeof(int); - int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int); - int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char); - - int offset = 0; - // VTK FILE +void writeVtkHeader(ofstream &out, int numberOfNodes, int numberOfCells) +{ out << "<?xml version=\"1.0\"?>\n"; out << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\" >" << "\n"; out << " <UnstructuredGrid>" << "\n"; - out << " <Piece NumberOfPoints=\"" << nofNodes << "\" NumberOfCells=\"" << nofCells << "\">\n"; + out << " <Piece NumberOfPoints=\"" << numberOfNodes << "\" NumberOfCells=\"" << numberOfCells << "\">\n"; +} - // POINTS SECTION +int writePointHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesPoints) +{ out << " <Points>\n"; out << " <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"appended\" offset=\"" << offset << "\" />\n"; out << " </Points>\n"; offset += (bytesPerByteVal + bytesPoints); + return offset; +} - // CELLS SECTION +int writeCellHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesCellConnectivity, int bytesCellOffsets, + int bytesCellTypes) +{ out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); out << " <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\"" << offset << "\" />\n "; offset += (bytesPerByteVal + bytesCellTypes); out << " </Cells>\n"; + return offset; +} +int writeDataHeader(ofstream &out, vector<string> &datanames, int offset, int bytesPerByteVal, int bytesScalarData) +{ + out << " <CellData>\n"; + for (size_t s = 0; s < datanames.size(); ++s) { + out << " <DataArray type=\"Float32\" Name=\"" << datanames[s] << "\" format=\"appended\" offset=\"" + << offset << "\" /> \n"; + offset += (bytesPerByteVal + bytesScalarData); + } + out << " </CellData>\n"; + return offset; +} + +void writeAppendDataHeader(ofstream &out) +{ out << " </Piece>\n"; out << " </UnstructuredGrid>\n"; - - // AppendedData SECTION out << " <AppendedData encoding=\"raw\">\n"; out << "_"; +} - // POINTS SECTION +void writePoints(ofstream &out, int bytesPerByteVal, int bytesPoints, vector<UbTupleFloat3> &nodes) +{ out.write((char *)&bytesPoints, bytesPerByteVal); - for (int n = 0; n < nofNodes; n++) { + for (int n = 0; n < (int)nodes.size(); n++) { out.write((char *)&val<1>(nodes[n]), sizeof(float)); out.write((char *)&val<2>(nodes[n]), sizeof(float)); out.write((char *)&val<3>(nodes[n]), sizeof(float)); } +} - // CELLS SECTION - // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); - for (int c = 0; c < nofCells; c++) { - out.write((char *)&val<1>(lines[c]), sizeof(int)); - out.write((char *)&val<2>(lines[c]), sizeof(int)); +void writeCellConnectivity(ofstream &out, int bytesPerByteVal, int bytesCellConnectivity, vector<UbTupleInt2> &cells) +{ + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); + for (int c = 0; c < (int)cells.size(); c++) { + out.write((char *)&val<1>(cells[c]), sizeof(int)); + out.write((char *)&val<2>(cells[c]), sizeof(int)); } +} - // cellOffsets +void writeCellOffsets(ofstream &out, int bytesPerByteVal, int bytesCellOffsets, int numberOfCells) +{ out.write((char *)&bytesCellOffsets, bytesPerByteVal); int itmp; - for (int c = 1; c <= nofCells; c++) { + for (int c = 1; c <= numberOfCells; c++) { itmp = 2 * c; out.write((char *)&itmp, sizeof(int)); } +} - // cellTypes +void writeCellTypes(ofstream &out, int bytesPerByteVal, int bytesCellTypes, int numberOfCells) +{ out.write((char *)&bytesCellTypes, bytesPerByteVal); unsigned char vtkCellType = 3; - for (int c = 0; c < nofCells; c++) { + for (int c = 0; c < numberOfCells; c++) { out.write((char *)&vtkCellType, sizeof(unsigned char)); } +} + +void writeCellData(ofstream &out, int bytesPerByteVal, int bytesScalarData, vector<string> &datanames, + vector<vector<float>> &celldata) +{ + for (size_t s = 0; s < datanames.size(); ++s) { + out.write((char *)&bytesScalarData, bytesPerByteVal); + for (size_t d = 0; d < celldata[s].size(); ++d) { + // loake kopie machen, da in celldata "doubles" sind + float tmp = (float)celldata[s][d]; + out.write((char *)&tmp, sizeof(float)); + } + } +} + +void writeEndOfFile(ofstream &out) +{ out << "\n</AppendedData>\n"; out << "</VTKFile>"; out << endl; out.close(); +} + +/*===============================================================================*/ +string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleInt2> &lines) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start"); + + ofstream out = createFileStream(vtkfilename); + + int nofNodes = (int)nodes.size(); + int nofCells = (int)lines.size(); + + int bytesPerByteVal = 4; //==sizeof(int) + int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); + int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int); + int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int); + int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char); + + int offset = 0; + + writeVtkHeader(out, nofNodes, nofCells); + offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints); + writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes); + writeAppendDataHeader(out); + + writePoints(out, bytesPerByteVal, bytesPoints, nodes); + writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines); + writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells); + writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells); + writeEndOfFile(out); UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - end"); return vtkfilename; } + +/*===============================================================================*/ +string WbWriterVtkXmlBinary::writeLinesWithLineData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleInt2> &lines, vector<string> &datanames, + vector<vector<float>> &celldata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - start"); + + ofstream out = createFileStream(vtkfilename); + + int nofNodes = (int)nodes.size(); + int nofCells = (int)lines.size(); + + int bytesPerByteVal = 4; //==sizeof(int) + int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); + int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int); + int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int); + int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char); + int bytesScalarData = 1 /*scalar */ * nofCells * sizeof(float); + + int offset = 0; + + writeVtkHeader(out, nofNodes, nofCells); + offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints); + offset = writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes); + writeDataHeader(out, datanames, offset, bytesPerByteVal, bytesScalarData); + writeAppendDataHeader(out); + + writePoints(out, bytesPerByteVal, bytesPoints, nodes); + writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines); + writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells); + writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells); + writeCellData(out, bytesPerByteVal, bytesScalarData, datanames, celldata); + writeEndOfFile(out); + + UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - end"); + + return vtkfilename; +} + /*===============================================================================*/ // std::string WbWriterVtkXmlBinary::writeLinesWithNodeData(const string& filename,vector<UbTupleFloat3 >& nodes, // vector<UbTupleInt2 >& lines, std::vector< std::string >& datanames, std::vector< std::vector< double > >& nodedata) @@ -276,7 +389,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl // // int bytesPerByteVal = 4; //==sizeof(int) // int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); -// int bytesCellConnectivty = 2 /*nodes per line */ * nofCells * sizeof(int ); +// int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int ); // int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int ); // int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char); // int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(float); @@ -296,7 +409,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl // //CELLS SECTION // out<<" <Cells>\n"; // out<<" <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<< offset <<"\" -// />\n"; offset += (bytesPerByteVal + bytesCellConnectivty); out<<" <DataArray type=\"Int32\" +// />\n"; offset += (bytesPerByteVal + bytesCellConnectivity); out<<" <DataArray type=\"Int32\" // Name=\"offsets\" format=\"appended\" offset=\""<< offset <<"\" />\n"; offset += (bytesPerByteVal + // bytesCellOffsets); out<<" <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\""<< // offset <<"\" />\n "; offset += (bytesPerByteVal + bytesCellTypes); out<<" </Cells>\n"; @@ -328,7 +441,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl // // //CELLS SECTION // //cellConnectivity -// out.write( (char*)&bytesCellConnectivty, bytesPerByteVal ); +// out.write( (char*)&bytesCellConnectivity, bytesPerByteVal ); // for(int c=0; c<nofCells; c++) // { // out.write( (char*)&val<1>(lines[c]), sizeof(int) ); @@ -397,7 +510,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 - coord */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 3 /*nodes per triangle */ * nofCells * sizeof(int); + int bytesCellConnectivity = 3 /*nodes per triangle */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per triangle */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of triangle */ * nofCells * sizeof(unsigned char); @@ -421,7 +534,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -446,7 +559,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(triangles[c]), sizeof(int)); out.write((char *)&val<2>(triangles[c]), sizeof(int)); @@ -502,7 +615,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename, int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 3 /*nodes per tri */ * nofCells * sizeof(int); + int bytesCellConnectivity = 3 /*nodes per tri */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per tri */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of tri */ * nofCells * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(float); @@ -527,7 +640,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename, out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -561,7 +674,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename, // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -625,7 +738,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 4 /*nodes per quad */ * nofCells * sizeof(int); + int bytesCellConnectivity = 4 /*nodes per quad */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per quad */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of quad */ * nofCells * sizeof(unsigned char); @@ -649,7 +762,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -674,7 +787,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -730,7 +843,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 4 /*nodes per quad */ * nofCells * sizeof(int); + int bytesCellConnectivity = 4 /*nodes per quad */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per quad */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of quad */ * nofCells * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(float); @@ -755,7 +868,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -789,7 +902,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -855,7 +968,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 4 /*nodes per quad */ * nofCells * sizeof(int); + int bytesCellConnectivity = 4 /*nodes per quad */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per quad */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of quad */ * nofCells * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofCells * sizeof(float); @@ -880,7 +993,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -914,7 +1027,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -984,7 +1097,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 4 /*nodes per quad */ * nofCells * sizeof(int); + int bytesCellConnectivity = 4 /*nodes per quad */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per quad */ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of quad */ * nofCells * sizeof(unsigned char); int bytesScalarDataPoint = 1 /*scalar */ * nofNodes * sizeof(float); @@ -1010,7 +1123,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1052,7 +1165,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -1128,7 +1241,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int); + int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per oct*/ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of oct */ * nofCells * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofCells * sizeof(float); @@ -1153,7 +1266,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1187,7 +1300,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -1257,7 +1370,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int); + int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per oct*/ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of oct */ * nofCells * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(double); @@ -1282,7 +1395,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1316,7 +1429,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -1386,7 +1499,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int); + int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int); int bytesCellOffsets = 1 /*offset per oct*/ * nofCells * sizeof(int); int bytesCellTypes = 1 /*type of oct */ * nofCells * sizeof(unsigned char); // int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(float); @@ -1411,7 +1524,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1436,7 +1549,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofCells; c++) { out.write((char *)&val<1>(cells[c]), sizeof(int)); out.write((char *)&val<2>(cells[c]), sizeof(int)); @@ -1491,7 +1604,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 1 /*nodes per cell */ * nofNodes * sizeof(int); + int bytesCellConnectivity = 1 /*nodes per cell */ * nofNodes * sizeof(int); int bytesCellOffsets = 1 /*offset per cell */ * nofNodes * sizeof(int); int bytesCellTypes = 1 /*type of line */ * nofNodes * sizeof(unsigned char); @@ -1515,7 +1628,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1540,7 +1653,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofNodes; c++) out.write((char *)&c, sizeof(int)); @@ -1586,7 +1699,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file int bytesPerByteVal = 4; //==sizeof(int) int bytesPoints = 3 /*x1/x2/x3 */ * nofNodes * sizeof(float); - int bytesCellConnectivty = 1 /*nodes per cell */ * nofNodes * sizeof(int); + int bytesCellConnectivity = 1 /*nodes per cell */ * nofNodes * sizeof(int); int bytesCellOffsets = 1 /*offset per cell*/ * nofNodes * sizeof(int); int bytesCellTypes = 1 /*type of oct */ * nofNodes * sizeof(unsigned char); int bytesScalarData = 1 /*scalar */ * nofNodes * sizeof(double); @@ -1611,7 +1724,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file out << " <Cells>\n"; out << " <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset << "\" />\n"; - offset += (bytesPerByteVal + bytesCellConnectivty); + offset += (bytesPerByteVal + bytesCellConnectivity); out << " <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset << "\" />\n"; offset += (bytesPerByteVal + bytesCellOffsets); @@ -1645,7 +1758,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file // CELLS SECTION // cellConnectivity - out.write((char *)&bytesCellConnectivty, bytesPerByteVal); + out.write((char *)&bytesCellConnectivity, bytesPerByteVal); for (int c = 0; c < nofNodes; c++) out.write((char *)&c, sizeof(int)); diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.h b/src/basics/basics/writer/WbWriterVtkXmlBinary.h index 421148d90497e3628ed274439c0b2fd7636b7fd2..0f2c31eda81ad0c1975c9715ac1b7fb37a06339b 100644 --- a/src/basics/basics/writer/WbWriterVtkXmlBinary.h +++ b/src/basics/basics/writer/WbWriterVtkXmlBinary.h @@ -93,6 +93,9 @@ public: // nodedata); // FIXME: hides function in base class + std::string writeLinesWithLineData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt2> &lines, + std::vector<std::string> &datanames, std::vector<std::vector<float>> &celldata) override; + ////////////////////////////////////////////////////////////////////////// // triangles // 2 diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp new file mode 100644 index 0000000000000000000000000000000000000000..798b55919df9e24dbc71ecfded5fb8a913cff8cf --- /dev/null +++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp @@ -0,0 +1,360 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WbWriterVtkXmlImageBinary.cpp +//! \ingroup writer +//! \author Soeren Freudiger, Sebastian Geller, Henry Korb, Henrik Asmuth +//======================================================================================= +#include <basics/utilities/UbLogger.h> +#include <basics/writer/WbWriterVtkXmlImageBinary.h> +#include <cstring> + +using namespace std; + +/*===============================================================================*/ +const std::string WbWriterVtkXmlImageBinary::pvdEndTag = " </Collection>\n</VTKFile>"; +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeCollection(const string &filename, const vector<string> &filenames, + const double &timeStep, const bool &sepGroups) +{ + string vtkfilename = filename + ".pvd"; + ofstream out(vtkfilename.c_str()); + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str()); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + string endian; + if (UbSystem::isLittleEndian()) + endian = "LittleEndian"; + else + endian = "BigEndian"; + out << "<VTKFile type=\"Collection\" version=\"0.1\" byte_order=\"" << endian << "\" >" << endl; + out << " <Collection>" << endl; + + int group = 0, part = 0; + for (size_t i = 0; i < filenames.size(); i++) { + out << " <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << part + << "\" file=\"" << filenames[i] << "\"/>" << endl; + if (sepGroups) + group++; + else + part++; + } + out << pvdEndTag; + out.close(); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::addFilesToCollection(const string &filename, const vector<string> &filenames, + const double &timeStep, const bool &sepGroups) +{ + string vtkfilename = filename; + fstream test(vtkfilename.c_str(), ios::in); + if (!test) { + test.clear(); + vtkfilename += ".pvd"; + test.open(vtkfilename.c_str(), ios::in); + if (!test) + return this->writeCollection(filename, filenames, timeStep, sepGroups); + } + + fstream out(vtkfilename.c_str(), ios::in | ios::out); + out.seekp(-(int)pvdEndTag.size() - 1, ios_base::end); + + int group = 0; + for (size_t i = 0; i < filenames.size(); i++) { + out << " <DataSet timestep=\"" << timeStep << "\" group=\"" << group << "\" part=\"" << i << "\" file=\"" + << filenames[i] << "\"/>" << endl; + if (sepGroups) + group++; + } + out << pvdEndTag; + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, const UbTupleInt6 &wholeExtent, + const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, + vector<string> &pieceSources, vector<UbTupleInt6> &pieceExtents, + vector<string> &pointDataNames, vector<string> &cellDataNames) +{ + string vtkfilename = filename + ".pvti"; + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - start"); + + ofstream out(vtkfilename.c_str()); + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str()); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + // VTK FILE + out << "<VTKFile type=\"PImageData\" version=\"0.1\" byte_order=\"LittleEndian\">" + << "\n"; + out << " <PImageData " + << "WholeExtent=\"" << val<1>(wholeExtent) << " " + << val<2>(wholeExtent) << " " + << val<3>(wholeExtent) << " " + << val<4>(wholeExtent) << " " + << val<5>(wholeExtent) << " " + << val<6>(wholeExtent) << "\" " + << "GhostLevel=\"0\" " + << "Origin=\"" << val<1>(origin) << " " + << val<2>(origin) << " " + << val<3>(origin) << "\" " + << "Spacing=\"" << val<1>(spacing) << " " + << val<2>(spacing) << " " + << val<3>(spacing) << "\" " + << "> \n"; + out << " <PPointData>\n"; + for (size_t s = 0; s < pointDataNames.size(); s++) + out << " <PDataArray type=\"Float32\" Name=\"" << pointDataNames[s] << "\"/>\n"; + out << " </PPointData>\n"; + if (cellDataNames.size() > 0) { + out << " <PCellData>\n"; + for (size_t s = 0; s < cellDataNames.size(); s++) + out << " <PDataArray type=\"Float32\" Name=\"" << cellDataNames[s] << "\"/>\n"; + out << " </PCellData>\n"; + } + for (size_t s = 0; s < pieceSources.size(); s++) + out << " <Piece Extent=\"" << val<1>(pieceExtents[s]) << " " + << val<2>(pieceExtents[s]) << " " + << val<3>(pieceExtents[s]) << " " + << val<4>(pieceExtents[s]) << " " + << val<5>(pieceExtents[s]) << " " + << val<6>(pieceExtents[s]) << "\" Source=\"" << pieceSources[s] << "\"/>\n"; + out << " </PImageData>\n"; + out << "</VTKFile>"; + out << endl; + out.close(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeOctsWithCellData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleInt8> & /*cells*/, vector<string> &datanames, + vector<vector<double>> &celldata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - start"); + + vector<string> nodeDataNames; + vector<vector<double>> nodedata; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + + this->writeData(vtkfilename, nodeDataNames, datanames, nodedata, celldata, extent, origin, spacing, extent); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeOctsWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<UbTupleUInt8> & /*cells*/, vector<string> &datanames, + vector<vector<double>> &nodedata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - start"); + + vector<string> cellDataNames; + vector<vector<double>> cellData; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + + this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent); + + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - end"); + + return vtkfilename; +} +/*===============================================================================*/ +string WbWriterVtkXmlImageBinary::writeNodesWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes, + vector<string> &datanames, vector<vector<double>> &nodedata) +{ + string vtkfilename = filename + getFileExtension(); + UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeNodesWithNodeData to " << vtkfilename << " - start"); + + vector<string> cellDataNames; + vector<vector<double>> cellData; + + UbTupleFloat3 origin, spacing; + UbTupleInt6 extent; + + getMetaDataOfImage(nodes, origin, spacing, extent); + this->writeData(vtkfilename, datanames, cellDataNames, nodedata, cellData, extent, origin, spacing, extent); + + return vtkfilename; +} + +void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes, UbTupleFloat3 &origin, + UbTupleFloat3 &spacing, UbTupleInt6 &extent) +{ + int nofNodes = (int)nodes.size(); + val<1>(origin) = val<1>(nodes[0]); + val<2>(origin) = val<2>(nodes[0]); + val<3>(origin) = val<3>(nodes[0]); + + float l_x = val<1>(nodes[nofNodes-1])-val<1>(origin); + float l_y = val<2>(nodes[nofNodes-1])-val<2>(origin); + + val<1>(spacing) = val<1>(nodes[1])-val<1>(nodes[0]); + int nx = (l_x) / val<1>(spacing); + val<2>(spacing) = val<2>(nodes[nx])-val<2>(nodes[0]); + int ny = (l_y) / val<2>(spacing); + val<3>(spacing) = val<3>(nodes[nx*ny])-val<3>(nodes[0]); + + val<1>(extent) = val<1>(origin) / val<1>(spacing); val<2>(extent) = val<1>(nodes[nofNodes - 1]) / val<1>(spacing); + val<3>(extent) = val<2>(origin) / val<2>(spacing); val<4>(extent) = val<2>(nodes[nofNodes - 1]) / val<2>(spacing); + val<5>(extent) = val<3>(origin) / val<3>(spacing); val<6>(extent) = val<3>(nodes[nofNodes - 1]) / val<3>(spacing); + +} + +void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename, vector<string> &pointDataNames, + vector<string> &cellDataNames, vector<vector<double>> &nodedata, + vector<vector<double>> &celldata, UbTupleInt6 &wholeExtent, + UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent, + unsigned int precision) +{ + ofstream out(vtkfilename.c_str(), ios::out | ios::binary); + out.precision(precision); + + if (!out) { + out.clear(); // flags ruecksetzen (ansonsten liefert utern if(!out) weiterhin true!!! + string path = UbSystem::getPathFromString(vtkfilename); + if (path.size() > 0) { + UbSystem::makeDirectory(path); + out.open(vtkfilename.c_str(), ios::out | ios::binary); + } + if (!out) + throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename); + } + + size_t nPoints = pointDataNames.size() > 0 ? nodedata[0].size() : celldata[0].size(); + + int bytesPerByteVal = 4; //==sizeof(int) + + int bytesScalarData = 1 /*scalar */ * (int)nPoints * sizeof(double); + + int offset = 0; + + // VTK FILE + out << "<?xml version=\"1.0\"?>\n"; + out << "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\" >" + << "\n"; + out << " <ImageData " + << "WholeExtent=\"" << val<1>(wholeExtent) << " " + << val<2>(wholeExtent) << " " + << val<3>(wholeExtent) << " " + << val<4>(wholeExtent) << " " + << val<5>(wholeExtent) << " " + << val<6>(wholeExtent) << "\" " + << "Origin=\"" << val<1>(origin) << " " + << val<2>(origin) << " " + << val<3>(origin) << "\" " + << "Spacing=\"" << val<1>(spacing) << " " + << val<2>(spacing) << " " + << val<3>(spacing) << "\"" + << "> \n"; + out << " <Piece Extent=\"" << val<1>(extent) << " " + << val<2>(extent) << " " + << val<3>(extent) << " " + << val<4>(extent) << " " + << val<5>(extent) << " " + << val<6>(extent) << "\">\n"; + + // DATA SECTION + if (pointDataNames.size() > 0) { + out << " <PointData>\n"; + for (size_t s = 0; s < pointDataNames.size(); ++s) { + out << " <DataArray type=\"Float64\" Name=\"" << pointDataNames[s] + << "\" format=\"appended\" offset=\"" << offset << "\" /> \n"; + offset += (bytesPerByteVal + bytesScalarData); + } + out << " </PointData>\n"; + } + + if (cellDataNames.size() > 0) { + out << " <CellData>\n"; + for (size_t s = 0; s < cellDataNames.size(); ++s) { + out << " <DataArray type=\"Float64\" Name=\"" << cellDataNames[s] + << "\" format=\"appended\" offset=\"" << offset << "\" /> \n"; + offset += (bytesPerByteVal + bytesScalarData); + } + out << " </CellData>\n"; + } + + out << " </Piece>\n"; + out << " </ImageData>\n"; + + // AppendedData SECTION + out << " <AppendedData encoding=\"raw\">\n"; + out << "_"; + + // DATA SECTION + // pointData + for (size_t s = 0; s < pointDataNames.size(); ++s) { + out.write((char *)&bytesScalarData, bytesPerByteVal); + for (size_t d = 0; d < nodedata[s].size(); ++d) { + double tmp = nodedata[s][d]; + out.write((char *)&tmp, sizeof(double)); + } + } + + // cellData + for (size_t s = 0; s < cellDataNames.size(); ++s) { + out.write((char *)&bytesScalarData, bytesPerByteVal); + for (size_t d = 0; d < celldata[s].size(); ++d) { + double tmp = celldata[s][d]; + out.write((char *)&tmp, sizeof(double)); + } + } + out << "\n </AppendedData>\n"; + out << "</VTKFile>"; + out << endl; + out.close(); +} diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h new file mode 100644 index 0000000000000000000000000000000000000000..c41ff442732e5f65db0f1dd1ec63e5c3ffca1486 --- /dev/null +++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h @@ -0,0 +1,110 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WbWriterVtkXmlBinary.h +//! \ingroup writer +//! \author Soeren Freudiger, Sebastian Geller +//======================================================================================= +#ifndef WBWRITERVTKXMLIMAGEBINARY_H +#define WBWRITERVTKXMLIMAGEBINARY_H + +#include <string> + +#include <basics/writer/WbWriter.h> + +#include "basics_export.h" + +class BASICS_EXPORT WbWriterVtkXmlImageBinary : public WbWriter +{ +public: + static WbWriterVtkXmlImageBinary *getInstance() + { + static WbWriterVtkXmlImageBinary instance; + return &instance; + } + + WbWriterVtkXmlImageBinary(const WbWriterVtkXmlImageBinary &) = delete; + const WbWriterVtkXmlImageBinary &operator=(const WbWriterVtkXmlImageBinary &) = delete; + +private: + WbWriterVtkXmlImageBinary() : WbWriter() + { + if (sizeof(unsigned char) != 1) + throw UbException(UB_EXARGS, "machine error char type mismatch"); + if (sizeof(int) != 4) + throw UbException(UB_EXARGS, "machine error int type mismatch"); + if (sizeof(float) != 4) + throw UbException(UB_EXARGS, "machine error float type mismatch"); + } + + static const std::string pvdEndTag; + +public: + std::string getFileExtension() override { return ".bin.vti"; } + + // write a metafile + std::string writeCollection(const std::string &filename, const std::vector<std::string> &filenames, + const double ×tep, const bool &sepGroups); + std::string addFilesToCollection(const std::string &filename, const std::vector<std::string> &filenames, + const double ×tep, const bool &sepGroups); + std::string writeParallelFile(const std::string &filename, const UbTupleInt6 &wholeExtent, const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, + std::vector<std::string> &pieceSources, std::vector<UbTupleInt6> &pieceExtents, + std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames); + + ////////////////////////////////////////////////////////////////////////// + // nodes + std::string writeNodesWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<std::string> &datanames, + std::vector<std::vector<double>> &nodedata) override; + + ////////////////////////////////////////////////////////////////////////// + // octs + // 7 ---- 6 + // /| /| + // 4 +--- 5 | + // | | | | + // | 3 ---+ 2 + // |/ |/ + // 0 ---- 1 + std::string writeOctsWithCellData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<UbTupleInt8> &cells, std::vector<std::string> &datanames, + std::vector<std::vector<double>> &celldata) override; + std::string writeOctsWithNodeData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, + std::vector<UbTupleUInt8> &cells, std::vector<std::string> &datanames, + std::vector<std::vector<double>> &nodedata) override; + void writeData(const std::string &vtkfilename, + std::vector<std::string> &pointDataNames, std::vector<std::string> &cellDataNames, + std::vector<std::vector<double>> &nodedata, std::vector<std::vector<double>> &celldata, + UbTupleInt6 &wholeExtent, + UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent, unsigned int precision=6); + +private: + void getMetaDataOfImage(std::vector<UbTupleFloat3> &nodes, UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent); +}; + +#endif // WBWRITERVTKXMLIMAGEBINARY_H diff --git a/src/basics/config/ConfigurationFile.h b/src/basics/config/ConfigurationFile.h index ef7e7c9f06f94cabb3ba9cbefe95c8ee75736958..4a53f7add85b9c6461fda0bab20fa6656eebc5d3 100644 --- a/src/basics/config/ConfigurationFile.h +++ b/src/basics/config/ConfigurationFile.h @@ -64,6 +64,10 @@ public: template<class T> T getValue(const std::string& key) const; + //! get value with key and default value + template<class T> + T getValue(const std::string& key, T defaultValue) const; + private: //! the container std::map<std::string, std::string> data; @@ -138,6 +142,19 @@ T ConfigurationFile::getValue(const std::string& key) const return x; } +template<class T> +T ConfigurationFile::getValue(const std::string& key, T defaultValue) const +{ + if (contains(key)) + { + return getValue<T>(key); + } + else + { + return defaultValue; + } +} + } #endif diff --git a/src/basics/tests/testUtilities.h b/src/basics/tests/testUtilities.h index c70d9cc5c11633ded6b696d92692e3d4edf8d2ca..57606edc130b0471b957202420cb12859a9cde84 100644 --- a/src/basics/tests/testUtilities.h +++ b/src/basics/tests/testUtilities.h @@ -1,6 +1,8 @@ #ifndef TESTUTILITIES_H #define TESTUTILITIES_H +#include <gmock/gmock.h> + inline auto RealEq = [](auto value) { #ifdef VF_DOUBLE_ACCURACY return testing::DoubleEq(value); diff --git a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h index fe4078af95904fa5e1580b54f3aa2edbb006bd3d..9c3bac9c3e2795fa99f339461c6a7f2d16448696 100644 --- a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h @@ -47,13 +47,13 @@ struct GKSGPU_EXPORT BoundaryCondition : virtual public BoundaryConditionStruct, virtual bool isWall() = 0; virtual bool isFluxBC(); - + virtual bool isInsulated(); virtual bool secondCellsNeeded(); virtual void runBoundaryConditionKernel( const SPtr<DataBase> dataBase, - const Parameters parameters, + const Parameters parameters, const uint level ) = 0; BoundaryConditionStruct toStruct() diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt index 5dbc533cc5f45c006c29a12242350f0433518bbf..6db6cbac1ff60c76986c3c22cc8017300d4f71ea 100644 --- a/src/gpu/GksGpu/CMakeLists.txt +++ b/src/gpu/GksGpu/CMakeLists.txt @@ -1,6 +1,6 @@ project(GksGpu LANGUAGES CUDA CXX) -vf_add_library(PRIVATE_LINK basics lbmCuda GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX) +vf_add_library(PRIVATE_LINK basics lbm GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX) target_include_directories(GksGpu PRIVATE "${VF_THIRD_DIR}/cuda_samples/") diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt index b9a2d12df4d0bee9396a706c6636b5f4056b2d3a..8ac5e69513eca94710797db1f971b2461336b769 100644 --- a/src/gpu/GksMeshAdapter/CMakeLists.txt +++ b/src/gpu/GksMeshAdapter/CMakeLists.txt @@ -1,3 +1,3 @@ project(GksMeshAdapter LANGUAGES CUDA CXX) -vf_add_library(PRIVATE_LINK basics GridGenerator lbmCuda) +vf_add_library(PRIVATE_LINK basics GridGenerator lbm) diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f3c4ad492b16c09b26acd00a624a54ad65dffda --- /dev/null +++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp @@ -0,0 +1,444 @@ +#include "TransientBCSetter.h" +#include "GridGenerator/grid/Grid.h" +#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" +#include <logger/Logger.h> + + +#include <math.h> +#include <sstream> +#include <fstream> +#include <iostream> +#include <algorithm> + +SPtr<FileCollection> createFileCollection(std::string prefix, FileType type) +{ + switch(type) + { + case FileType::VTK: + return std::make_shared<VTKFileCollection>(prefix); + break; + default: + return nullptr; + } +} + +SPtr<TransientBCInputFileReader> createReaderForCollection(SPtr<FileCollection> fileCollection, uint readLevel) +{ + switch(fileCollection->getFileType()) + { + case FileType::VTK: + return std::make_shared<VTKReader>(std::static_pointer_cast<VTKFileCollection>(fileCollection), readLevel); + break; + default: + return nullptr; + } +} + +template<typename T> +std::vector<T> readStringToVector(std::string s) +{ + std::vector<T> out; + std::stringstream input(s); + float num; + while(input >> num) + { + out.push_back(num); + } + return out; +} + +std::string readElement(std::string line) +{ + size_t elemStart = line.find("<")+1; + // size_t elemEnd = line.find("/>", elemStart); + size_t nameLen = line.find(" ", elemStart)-elemStart; + return line.substr(elemStart, nameLen); +} + +std::string readAttribute(std::string line, std::string attributeName) +{ + size_t attributeStart = line.find(attributeName)+attributeName.size() + 2; // add 2 for '="' + size_t attributeLen = line.find("\"", attributeStart)-attributeStart; + return line.substr(attributeStart, attributeLen); +} + +void VTKFile::readHeader() +{ + //TODO make this more flexible + std::ifstream file(this->fileName); + + std::string line; + + getline(file, line); // VTKFile + if(line[1]=='?') getline(file, line); // ignore first line if xml version + + getline(file, line); // ImageData + std::vector<int> wholeExtent = readStringToVector<int>(readAttribute(line, "WholeExtent")); + std::vector<float> origin = readStringToVector<float>(readAttribute(line, "Origin")); + std::vector<float> spacing = readStringToVector<float>(readAttribute(line, "Spacing")); + + getline(file, line); // Piece + std::vector<int> pieceExtent = readStringToVector<int>(readAttribute(line, "Extent")); + getline(file, line); // PointData + + getline(file, line); + while(strcmp(readElement(line).c_str(), "DataArray")==0) + { + Quantity quant = Quantity(); + quant.name = readAttribute(line, "Name"); + quant.offset = std::stoi(readAttribute(line, "offset")); + this->quantities.push_back( quant ); + getline(file, line); + } + getline(file, line); // </Piece + getline(file, line); // </ImageData + getline(file, line); // AppendedData + + int offset = int(file.tellg())+sizeof(char)+4; // skip underscore and bytesPerVal + + for(auto& quantity: this->quantities) + { + quantity.offset += offset; + } + + file.close(); + + this->deltaX = spacing[0]; + this->deltaY = spacing[1]; + this->deltaZ = spacing[2]; + + this->nx = pieceExtent[1]-pieceExtent[0]+1; + this->ny = pieceExtent[3]-pieceExtent[2]+1; + this->nz = pieceExtent[5]-pieceExtent[4]+1; + + this->minX = origin[0]+this->deltaX*pieceExtent[0]; this->maxX = (this->nx-1)*this->deltaX+this->minX; + this->minY = origin[1]+this->deltaY*pieceExtent[2]; this->maxY = (this->ny-1)*this->deltaY+this->minY; + this->minZ = origin[2]+this->deltaZ*pieceExtent[4]; this->maxZ = (this->nz-1)*this->deltaZ+this->minZ; + // printFileInfo(); + +} + +bool VTKFile::markNANs(std::vector<uint> readIndices) +{ + std::ifstream buf(fileName.c_str(), std::ios::in | std::ios::binary); + + std::vector<double> tmp; + tmp.reserve(readIndices.size()); + buf.seekg(this->quantities[0].offset); + buf.read((char*) tmp.data(), sizeof(double)*readIndices.size()); + auto firstNAN = std::find_if(tmp.begin(), tmp.end(), [](auto it){ return isnan(it); }); + + return firstNAN != tmp.end(); +} + +void VTKFile::loadFile() +{ + std::ifstream buf(this->fileName.c_str(), std::ios::in | std::ios::binary); + for(auto& quantity: this->quantities) + { + quantity.values.resize(getNumberOfPoints()); + buf.seekg(quantity.offset); + buf.read(reinterpret_cast<char*>(quantity.values.data()), this->getNumberOfPoints()*sizeof(double)); + } + + buf.close(); + + this->loaded = true; +} + +void VTKFile::unloadFile() +{ + for(auto& quantity : this->quantities) + { + std::vector<double> replacement; + quantity.values.swap(replacement); + } + this->loaded = false; +} + +void VTKFile::getData(real *data, uint numberOfNodes, const std::vector<uint> &readIndices, + const std::vector<uint> &writeIndices, uint offsetRead, uint offsetWrite) +{ + if(!this->loaded) loadFile(); + + size_t nPoints = writeIndices.size(); + + for(size_t j=0; j<this->quantities.size(); j++) + { + real* quant = &data[j*numberOfNodes]; + for(size_t i=0; i<nPoints; i++) + { + quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndices[i]+offsetRead]; + } + } +} + +void VTKFile::printFileInfo() +{ + printf("file %s with \n nx %i ny %i nz %i \n origin %f %f %f \n spacing %f %f %f \n", + fileName.c_str(), nx, ny, nz, minX, minY, minZ, deltaX, deltaY, deltaZ); + for(auto quantity: this->quantities) + { + printf("\t quantity %s offset %i \n", quantity.name.c_str(), quantity.offset); + } + +} + + +void VTKFileCollection::findFiles() +{ + bool foundLastLevel = false; + + while(!foundLastLevel) + { + bool foundLastID = false; + std::vector<std::vector<VTKFile>> filesOnThisLevel; + while(!foundLastID) + { + bool foundLastPart = false; + std::vector<VTKFile> filesWithThisId; + while (!foundLastPart) + { + std::string fname = makeFileName((int)files.size(), (int)filesOnThisLevel.size(), (int)filesWithThisId.size()); + std::ifstream f(fname); + if(f.good()) + filesWithThisId.emplace_back(fname); + else + foundLastPart = true; + } + if(!filesWithThisId.empty()) + { + VF_LOG_INFO("VTKFileCollection found {} files with ID {} level {}", filesWithThisId.size(), filesOnThisLevel.size(), files.size() ); + filesOnThisLevel.push_back(filesWithThisId); + } + else foundLastID = true; + } + + + if(!filesOnThisLevel.empty()) + files.push_back(filesOnThisLevel); + else + foundLastLevel = true; + + } + + if(files.empty()) + VF_LOG_CRITICAL("VTKFileCollection found no files!"); +} + +void TransientBCInputFileReader::getNeighbors(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM) +{ + std::copy(planeNeighbor0PP.begin(), planeNeighbor0PP.end(), &neighbor0PP[writingOffset]); + std::copy(planeNeighbor0PM.begin(), planeNeighbor0PM.end(), &neighbor0PM[writingOffset]); + std::copy(planeNeighbor0MP.begin(), planeNeighbor0MP.end(), &neighbor0MP[writingOffset]); + std::copy(planeNeighbor0MM.begin(), planeNeighbor0MM.end(), &neighbor0MM[writingOffset]); +} + +void TransientBCInputFileReader::getWeights(real* _weights0PP, real* _weights0PM, real* _weights0MP, real* _weights0MM) +{ + std::copy(weights0PP.begin(), weights0PP.end(), &_weights0PP[writingOffset]); + std::copy(weights0PM.begin(), weights0PM.end(), &_weights0PM[writingOffset]); + std::copy(weights0MP.begin(), weights0MP.end(), &_weights0MP[writingOffset]); + std::copy(weights0MM.begin(), weights0MM.end(), &_weights0MM[writingOffset]); +} + + +void VTKReader::initializeIndexVectors() +{ + this->readIndices.resize(this->fileCollection->files.size()); + this->writeIndices.resize(this->fileCollection->files.size()); + this->nFile.resize(this->fileCollection->files.size()); + for(size_t lev=0; lev<this->fileCollection->files.size(); lev++) + { + this->readIndices[lev].resize(this->fileCollection->files[lev].size()); + this->writeIndices[lev].resize(this->fileCollection->files[lev].size()); + this->nFile[lev].resize(this->fileCollection->files[lev].size()); + } +} + +void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) +{ + this->nPoints = (uint)coordsY.size(); + this->initializeIndexVectors(); + real max_diff = 1e-4; // maximum distance between point on grid and precursor plane to count as exact match + real eps = 1e-7; // small number to avoid division by zero + bool perfect_match = true; + + this->weights0PP.reserve(this->nPoints); + this->weights0PM.reserve(this->nPoints); + this->weights0MP.reserve(this->nPoints); + this->weights0MM.reserve(this->nPoints); + + this->planeNeighbor0PP.reserve(this->nPoints); + this->planeNeighbor0PM.reserve(this->nPoints); + this->planeNeighbor0MP.reserve(this->nPoints); + this->planeNeighbor0MM.reserve(this->nPoints); + + for(uint i=0; i<nPoints; i++) + { + + real posY = coordsY[i]; + real posZ = coordsZ[i]; + bool found0PP = false, found0PM = false, found0MP = false, found0MM = false, foundAll = false; + + uint level = this->readLevel; + + for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++) + { + VTKFile &file = this->fileCollection->files[level][fileId][0]; + if(!file.inBoundingBox(posY, posZ, 0.0f)) continue; + + // y in simulation is x in precursor/file, z in simulation is y in precursor/file + // simulation -> file: N -> E, S -> W, T -> N, B -> S + int idx = file.findNeighborMMM(posY, posZ, 0.f); //!> index of nearest WSB neighbor on precursor file + + if(idx!=-1) + { + // Filter for exact matches + if(abs(posY-file.getX(idx)) < max_diff && abs(posZ-file.getY(idx)) < max_diff) + { + this->weights0PP.emplace_back(1e6f); + this->weights0PM.emplace_back(0.f); + this->weights0MP.emplace_back(0.f); + this->weights0MM.emplace_back(0.f); + uint writeIdx = this->getWriteIndex(level, fileId, idx); //!> writeIdx: index on host/device array where precursor value will be written to after loading from file + this->planeNeighbor0PP.push_back(writeIdx); //!> neighbor lists mapping where BC kernel should read from on host/device array + this->planeNeighbor0PM.push_back(writeIdx); + this->planeNeighbor0MP.push_back(writeIdx); + this->planeNeighbor0MM.push_back(writeIdx); + found0PP = true; + found0PM = true; + found0MM = true; + found0MP = true; + } + else + { + perfect_match = false; + } + + if(!found0MM) + { + found0MM = true; + real dy = file.getX(idx)-posY; + real dz = file.getY(idx)-posZ; + this->weights0MM.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighbor0MM.emplace_back(getWriteIndex(level, fileId, idx)); + } + + } + + if(!found0PP) //NT in simulation is EN in precursor + { + int index = file.findNeighborPPM(posY, posZ, 0.f); + if(index!=-1) + { + found0PP = true; + real dy = file.getX(index)-posY; + real dz = file.getY(index)-posZ; + this->weights0PP.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index)); + } + } + + if(!found0PM) //NB in simulation is ES in precursor + { + int index = file.findNeighborPMM(posY, posZ, 0.f); + if(index!=-1) + { + found0PM = true; + real dy = file.getX(index)-posY; + real dz = file.getY(index)-posZ; + this->weights0PM.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index)); + } + } + + if(!found0MP) //ST in simulation is WN in precursor + { + int index = file.findNeighborMPM(posY, posZ, 0.f); + if(index!=-1) + { + found0MP = true; + real dy = file.getX(index)-posY; + real dz = file.getY(index)-posZ; + this->weights0MP.emplace_back(1.f/(dy*dy+dz*dz+eps)); + this->planeNeighbor0MP.emplace_back(getWriteIndex(level, fileId, index)); + } + } + + foundAll = found0PP && found0PM && found0MP && found0MM; + + if(foundAll) break; + } + + if(!foundAll) + { + VF_LOG_CRITICAL("Found no matching precursor neighbors for grid point at y={}, z={} \n", posY, posZ); + throw std::runtime_error("VTKReader::fillArrays(): Did not find neighbors in the FileCollection for all points"); + } + } + + if(perfect_match) + printf("Precursor was a perfect match \n"); + + + for(size_t level=0; level<this->fileCollection->files.size(); level++){ + for(size_t id=0; id<this->fileCollection->files[level].size(); id++){ + if(this->fileCollection->files[level][id][0].markNANs(this->readIndices[level][id])) + throw std::runtime_error("Found a NAN in the precursor where a velocity is needed"); + }} +} + +uint VTKReader::getWriteIndex(int level, int id, int linearIndex) +{ + auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex); + uint idx = it-this->writeIndices[level][id].begin(); + if(it==this->writeIndices[level][id].end()) + { + this->writeIndices[level][id].push_back(this->nPointsRead); //!> index on host/device array where value from file will be written to + this->readIndices[level][id].push_back(linearIndex); //!> index in file that will be read from + this->nPointsRead++; + } + return idx; +} + + +void VTKReader::getNextData(real* data, uint numberOfNodes, real time) +{ + // for(size_t level=0; level<this->fileCollection->files.size(); level++) + // { + uint level = this->readLevel; + for(size_t id=0; id<this->fileCollection->files[level].size(); id++) + { + size_t numberOfFiles = this->nFile[level][id]; + + + if(!this->fileCollection->files[level][id][numberOfFiles].inZBounds(time)) + { + numberOfFiles++; + + printf("switching to precursor file no. %zu\n", numberOfFiles); + if(numberOfFiles == this->fileCollection->files[level][id].size()) + throw std::runtime_error("Not enough Precursor Files to read"); + + this->fileCollection->files[level][id][numberOfFiles-1].unloadFile(); + if(numberOfFiles+1<this->fileCollection->files[level][id].size()) + { + VTKFile* nextFile = &this->fileCollection->files[level][id][numberOfFiles+1]; + if(! nextFile->isLoaded()) + { + read.wait(); + read = std::async(std::launch::async, [](VTKFile* file){ file->loadFile(); }, &this->fileCollection->files[level][id][numberOfFiles+1]); + } + } + } + + + VTKFile* file = &this->fileCollection->files[level][id][numberOfFiles]; + + int off = file->getClosestIdxZ(time)*file->getNumberOfPointsInXYPlane(); + file->getData(data, numberOfNodes, this->readIndices[level][id], this->writeIndices[level][id], off, this->writingOffset); + this->nFile[level][id] = numberOfFiles; + } + // } +} diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h new file mode 100644 index 0000000000000000000000000000000000000000..1663a3ff37ba1bb062647847462d4e364baed93b --- /dev/null +++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h @@ -0,0 +1,201 @@ +#ifndef TRANSIENTBCSETTER_H_ +#define TRANSIENTBCSETTER_H_ + +#include "Core/DataTypes.h" +#include <Core/StringUtilities/StringUtil.h> +#include "PointerDefinitions.h" + +#include <string> +#include <vector> +#include <math.h> +#include <sstream> +#include <future> +class Grid; +namespace gg +{ + class BoundaryCondition; +} + + +enum class FileType +{ + VTK +}; + +struct Quantity +{ + std::string name; + int offset; + std::vector<double> values; +}; + +class VTKFile +{ +public: + explicit VTKFile(std::string _fileName): + fileName(_fileName) + { + readHeader(); + this->loaded = false; + // printFileInfo(); + }; + + void getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite); + bool markNANs(std::vector<uint> readIndices); + bool inBoundingBox(real posX, real posY, real posZ){return inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); }; + bool inXBounds(real posX){ return posX<=maxX && posX>=minX; }; + bool inYBounds(real posY){ return posY<=maxY && posY>=minY; }; + bool inZBounds(real posZ){ return posZ<=maxZ && posZ>=minZ; }; + int findNeighborMMM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX) , getIdx0M0(posY) , getIdx00M(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborMMP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX) , getIdx0M0(posY) , getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborMPM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX) , getIdx0M0(posY)+1, getIdx00M(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborMPP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX) , getIdx0M0(posY)+1, getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborPMM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY) , getIdx00M(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborPMP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY) , getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborPPM(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)+1, getIdx00M(posZ) ); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int findNeighborPPP(real posX, real posY, real posZ){ int idx = getLinearIndex(getIdxM00(posX)+1, getIdx0M0(posY)+1, getIdx00M(posZ)+1); return (idx>=0) && (idx<nx*ny*nz) ? idx : -1; }; + int getIdxX(int linearIdx){ return linearIdx%nx;}; + int getIdxY(int linearIdx){ return (linearIdx/nx)%ny;}; + int getIdxZ(int linearIdx){ return linearIdx/(nx*ny); }; + real getX(int linearIdx){ return getIdxX(linearIdx)*deltaX+minX; }; + real getY(int linearIdx){ return getIdxY(linearIdx)*deltaY+minY; }; + real getZ(int linearIdx){ return getIdxZ(linearIdx)*deltaZ+minZ; }; + int getIdxM00(real posX){ return (posX-minX)/deltaX; }; + int getIdx0M0(real posY){ return (posY-minY)/deltaY; }; + int getIdx00M(real posZ){ return (posZ-minZ)/deltaZ; }; + int getClosestIdxX(real posX){ int x = round((posX-minX)/deltaX); return x>nx ? nx : (x<0 ? 0 : x);}; + int getClosestIdxY(real posY){ int y = round((posY-minY)/deltaY); return y>ny ? ny : (y<0 ? 0 : y);}; + int getClosestIdxZ(real posZ){ int z = round((posZ-minZ)/deltaZ); return z>nz ? nz : (z<0 ? 0 : z);}; + int getLinearIndex(int idxX, int idxY, int idxZ){ return idxX + nx*(idxY+ny*idxZ); }; + int getNumberOfPointsInXYPlane(){ return nx*ny; } + int getNumberOfPointsInYZPlane(){ return ny*nz; } + int getNumberOfPointsInXZPlane(){ return nx*nz; } + int getNumberOfPoints(){ return nx*ny*nz; } + size_t getNumberOfQuantities(){ return quantities.size(); } + void loadFile(); + void unloadFile(); + bool isLoaded(){return loaded;}; + + +private: + void readHeader(); + void printFileInfo(); + +public: + +private: + std::string fileName; + real minX, maxX, minY, maxY, minZ, maxZ; + real deltaX, deltaY, deltaZ; + int nx, ny, nz; + std::vector<Quantity> quantities; + bool loaded; +}; + +class FileCollection +{ +public: + FileCollection(std::string _prefix): + prefix(_prefix){}; + + virtual ~FileCollection() = default; + + virtual size_t getNumberOfQuantities() = 0; + + virtual FileType getFileType() = 0; + +protected: + std::string prefix; +}; + + +class VTKFileCollection : public FileCollection +{ +public: + VTKFileCollection(std::string _prefix): + FileCollection(_prefix) + { + findFiles(); + }; + + FileType getFileType() override{ return FileType::VTK; }; + size_t getNumberOfQuantities() override{ return files[0][0][0].getNumberOfQuantities(); } + + +private: + void findFiles(); + std::string makeFileName(int level, int id, int part) + { + return prefix + "_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_File_" + StringUtil::toString<int>(part) + + ".bin." + suffix; + }; + + +public: + static const inline std::string suffix = "vti"; + std::vector<std::vector<std::vector<VTKFile>>> files; +}; + + +class TransientBCInputFileReader +{ +public: + TransientBCInputFileReader() + { + this->nPoints = 0; + this->nPointsRead = 0; + this->writingOffset = 0; + }; + virtual ~TransientBCInputFileReader() = default; + + virtual void getNextData(real* data, uint numberOfNodes, real time)=0; + virtual void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ)=0; + uint getNPoints(){return nPoints; }; + uint getNPointsRead(){return nPointsRead; }; + size_t getNumberOfQuantities(){ return nQuantities; }; + void setWritingOffset(uint offset){ this->writingOffset = offset; } + void getNeighbors(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM); + void getWeights(real* _weights0PP, real* _weights0PM, real* _weights0MP, real* _weights0MM); + +public: + std::vector<uint> planeNeighbor0PP, planeNeighbor0PM, planeNeighbor0MP, planeNeighbor0MM; + std::vector<real> weights0PP, weights0PM, weights0MP, weights0MM; + +protected: + uint nPoints, nPointsRead, writingOffset; + uint nReads=0; + size_t nQuantities=0; +}; + + +class VTKReader : public TransientBCInputFileReader +{ +public: + VTKReader(SPtr<VTKFileCollection> _fileCollection, uint _readLevel): + fileCollection(_fileCollection), + readLevel(_readLevel) + { + this->nQuantities = fileCollection->getNumberOfQuantities(); + read = std::async([](){}); + }; + void getNextData(real* data, uint numberOfNodes, real time) override; + void fillArrays(std::vector<real>& coordsY, std::vector<real>& coordsZ) override; +private: + uint getWriteIndex(int level, int id, int linearIdx); + void initializeIndexVectors(); + +private: + std::vector<std::vector<std::vector<uint>>> readIndices, writeIndices; + std::vector<std::vector<size_t>> nFile; + SPtr<VTKFileCollection> fileCollection; + uint readLevel; + std::future<void> read; +}; + + +SPtr<FileCollection> createFileCollection(std::string prefix, FileType type); +SPtr<TransientBCInputFileReader> createReaderForCollection(SPtr<FileCollection> fileCollection, uint readLevel); + +#endif //TRANSIENTBCSETTER_H_ diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp index 5102f60fc295aadf4323a4b332bf3dd8f7f21dbf..b0fb2604946b83ead45c30adabbcfe8dc26fa656 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp @@ -36,12 +36,12 @@ #include "grid/BoundaryConditions/Side.h" #include "grid/Grid.h" +#include "GridGenerator/TransientBCSetter/TransientBCSetter.h" bool gg::BoundaryCondition::isSide( SideType side ) const { return this->side->whoAmI() == side; } - ////////////////////////////////////////////////////////////////////////// void VelocityBoundaryCondition::setVelocityProfile( @@ -124,5 +124,4 @@ void StressBoundaryCondition::fillSamplingIndices(std::vector<SPtr<Grid> > grid, this->velocitySamplingIndices.push_back( grid[level]->transCoordToIndex(x_sampling, y_sampling, z_sampling) ); } -} - +} \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h index 4a3990d9f815042297be76ae83a61268c8ad6815..22342aec9839afad9bb37b1b11812f6d1750ed7b 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h @@ -45,6 +45,8 @@ class Grid; class Side; enum class SideType; +class TransientBCInputFileReader; + namespace gg { class BoundaryCondition @@ -63,6 +65,8 @@ public: bool isSide(SideType side) const; real getQ(uint index, uint dir) { return this->qs[index][dir]; } + + void getCoords( SPtr<Grid> grid, std::vector<real>& x, std::vector<real>& y, std::vector<real>& z); }; } @@ -246,6 +250,7 @@ public: real getVy(uint index) { return this->vyList[index]; } real getVz(uint index) { return this->vzList[index]; } + void setVelocityProfile( SPtr<Grid> grid, std::function<void(real,real,real,real&,real&,real&)> velocityProfile ); }; @@ -329,5 +334,32 @@ public: real getNormalz(uint index) { return this->normalZList[index]; } }; +class PrecursorBoundaryCondition : public gg::BoundaryCondition +{ +public: + static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int timeStepsBetweenReads, real velocityX, real velocityY, real velocityZ) + { + return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ)); + } + SPtr<TransientBCInputFileReader> getReader(){ return reader; } + real getVelocityX() { return velocityX; } + real getVelocityY() { return velocityY; } + real getVelocityZ() { return velocityZ; } + +private: + PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _timeStepsBetweenReads, real vx, real vy, real vz) : reader(_reader), timeStepsBetweenReads(_timeStepsBetweenReads), velocityX(vx), velocityY(vy), velocityZ(vz) { }; + virtual char getType() const override + { + return vf::gpu::BC_VELOCITY; + } +public: + uint timeStepsBetweenReads; //!> read data every nth timestep + +private: + real velocityX = 0.0; + real velocityY = 0.0; + real velocityZ = 0.0; + SPtr<TransientBCInputFileReader> reader; +}; #endif \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index 6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a..ba4eea50ffb6bc136528db31207274d626fe9b15 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -1,28 +1,28 @@ //======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ // \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -// This file is part of VirtualFluids. VirtualFluids is free software: you can +// This file is part of VirtualFluids. VirtualFluids is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // @@ -37,9 +37,24 @@ #include "grid/NodeValues.h" #include "utilities/math/Math.h" +#include <array> +#include <cstddef> +#include <vector> using namespace gg; +std::array<real, 3> Side::getNormal() const +{ + std::array<real, 3> normal; + if(this->getCoordinate()==X_INDEX) + normal = {(real)this->getDirection(), 0.0, 0.0}; + if(this->getCoordinate()==Y_INDEX) + normal = {0.0, (real)this->getDirection(), 0.0}; + if(this->getCoordinate()==Z_INDEX) + normal = {0.0, 0.0, (real)this->getDirection()}; + return normal; +} + void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter) { @@ -49,11 +64,17 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition { const uint index = getIndex(grid, coord, constant, v1, v2); - if ((index != INVALID_INDEX) && ( grid->getFieldEntry(index) == vf::gpu::FLUID - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC - || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC - || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF )) + if(index == INVALID_INDEX) + continue; + + if ( grid->getFieldEntry(index) == vf::gpu::FLUID + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC + || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF + // Overlap of BCs on edge nodes + || grid->nodeHasBC(index) ) { grid->setFieldEntry(index, boundaryCondition->getType()); boundaryCondition->indices.push_back(index); @@ -64,9 +85,12 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition boundaryCondition->patches.push_back(0); } - } } + + const auto currentBCSide = this->whoAmI(); + if(currentBCSide != SideType::GEOMETRY) + grid->addBCalreadySet(currentBCSide); } void Side::setPressureNeighborIndices(SPtr<BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index) @@ -119,50 +143,111 @@ void Side::setStressSamplingIndices(SPtr<BoundaryCondition> boundaryCondition, S void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uint index) { - std::vector<real> qNode(grid->getEndDirection() + 1); - for (int dir = 0; dir <= grid->getEndDirection(); dir++) - { - real x,y,z; - grid->transIndexToCoords( index, x, y, z ); + for (int dir = 0; dir <= grid->getEndDirection(); dir++) { + real x, y, z; + grid->transIndexToCoords(index, x, y, z); - real coords[3] = {x,y,z}; + std::array<real, 3> coords = { x, y, z }; + std::array<real, 3> neighborCoords = getNeighborCoordinates(grid.get(), coords, (size_t)dir); - real neighborX = x + grid->getDirection()[dir * DIMENSION + 0] * grid->getDelta(); - real neighborY = y + grid->getDirection()[dir * DIMENSION + 1] * grid->getDelta(); - real neighborZ = z + grid->getDirection()[dir * DIMENSION + 2] * grid->getDelta(); + correctNeighborForPeriodicBoundaries(grid.get(), coords, neighborCoords); - // correct neighbor coordinates in case of periodic boundaries - if( grid->getPeriodicityX() && grid->getFieldEntry( grid->transCoordToIndex( neighborX, y, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ) - { - if( neighborX > x ) neighborX = grid->getFirstFluidNode( coords, 0, grid->getStartX() ); - else neighborX = grid->getLastFluidNode ( coords, 0, grid->getEndX() ); - } + const uint neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]); - if( grid->getPeriodicityY() && grid->getFieldEntry( grid->transCoordToIndex( x, neighborY, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ) - { - if( neighborY > y ) neighborY = grid->getFirstFluidNode( coords, 1, grid->getStartY() ); - else neighborY = grid->getLastFluidNode ( coords, 1, grid->getEndY() ); + //! Only setting q's that partially point in the Side-normal direction + const bool alignedWithNormal = this->isAlignedWithMyNormal(grid.get(), dir); + if (grid->isStopperForBC(neighborIndex) && alignedWithNormal) { + qNode[dir] = 0.5; + } else { + qNode[dir] = -1.0; } - if( grid->getPeriodicityZ() && grid->getFieldEntry( grid->transCoordToIndex( x, y, neighborZ ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ) - { - if( neighborZ > z ) neighborZ = grid->getFirstFluidNode( coords, 2, grid->getStartZ() ); - else neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() ); + // reset diagonals in case they were set by another bc + resetDiagonalsInCaseOfOtherBC(grid.get(), qNode, dir, coords); + } + + boundaryCondition->qs.push_back(qNode); +} + +std::array<real, 3> Side::getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates, size_t direction) const +{ + return { coordinates[0] + grid->getDirection()[direction * DIMENSION + 0] * grid->getDelta(), + coordinates[1] + grid->getDirection()[direction * DIMENSION + 1] * grid->getDelta(), + coordinates[2] + grid->getDirection()[direction * DIMENSION + 2] * grid->getDelta() }; +} + +bool Side::neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const +{ + const auto neighborCoords = getNeighborCoordinates(grid, coordinates, sideToD3Q27.at(side)); + const auto neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]); + return grid->isStopperForBC(neighborIndex); +} + +void Side::resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real> &qNode, int dir, + const std::array<real, 3> &coordinates) const +{ + // When to reset a diagonal q to -1: + // - it is normal to another boundary condition which was already set + // - and it actually is influenced by the other bc: + // We check if its neighbor in the regular direction to the other bc is a stopper. If it is a stopper, it is influenced by the other bc. + + if (qNode[dir] == 0.5 && grid->getBCAlreadySet().size() > 0) { + for (int i = 0; i < (int)grid->getBCAlreadySet().size(); i++) { + SideType otherDir = grid->getBCAlreadySet()[i]; + + // only reset normals for nodes on edges and corners, not on faces + if (!neighborNormalToSideIsAStopper(grid, coordinates, otherDir)) + continue; + + const auto otherNormal = normals.at(otherDir); + if (isAlignedWithNormal(grid, dir, otherNormal)) { + qNode[dir] = -1.0; + } } + } +} - uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ ); - if( grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID || - grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID ) - qNode[dir] = 0.5; +bool Side::isAlignedWithMyNormal(const Grid *grid, int dir) const +{ + std::array<real, 3> normal = this->getNormal(); + return isAlignedWithNormal(grid, dir, normal); +} + +bool Side::isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3> &normal) const +{ + return (normal[0] * grid->getDirection()[dir * DIMENSION + 0] + + normal[1] * grid->getDirection()[dir * DIMENSION + 1] + + normal[2] * grid->getDirection()[dir * DIMENSION + 2]) > 0; +} + +void Side::correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighborCoords) const +{ + // correct neighbor coordinates in case of periodic boundaries + if (grid->getPeriodicityX() && + grid->getFieldEntry(grid->transCoordToIndex(neighborCoords[0], coords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) { + if (neighborCoords[0] > coords[0]) + neighborCoords[0] = grid->getFirstFluidNode(coords.data(), 0, grid->getStartX()); else - qNode[dir] = -1.0; + neighborCoords[0] = grid->getLastFluidNode(coords.data(), 0, grid->getEndX()); + } + if (grid->getPeriodicityY() && + grid->getFieldEntry(grid->transCoordToIndex(coords[0], neighborCoords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) { + if (neighborCoords[1] > coords[1]) + neighborCoords[1] = grid->getFirstFluidNode(coords.data(), 1, grid->getStartY()); + else + neighborCoords[1] = grid->getLastFluidNode(coords.data(), 1, grid->getEndY()); } - boundaryCondition->qs.push_back(qNode); + if (grid->getPeriodicityZ() && + grid->getFieldEntry(grid->transCoordToIndex(coords[0], coords[1], neighborCoords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) { + if (neighborCoords[2] > coords[2]) + neighborCoords[2] = grid->getFirstFluidNode(coords.data(), 2, grid->getStartZ()); + else + neighborCoords[2] = grid->getLastFluidNode(coords.data(), 2, grid->getEndZ()); + } } uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2) @@ -177,7 +262,7 @@ uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, } -void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<BoundaryCondition> boundaryCondition) +void Geometry::addIndices(const std::vector<SPtr<Grid>> &grids, uint level, SPtr<BoundaryCondition> boundaryCondition) { auto geometryBoundaryCondition = std::dynamic_pointer_cast<GeometryBoundaryCondition>(boundaryCondition); @@ -190,7 +275,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++) { - const real q = grids[level]->getQValue(index, dir); + const real q = grids[level]->getQValue(index, dir); qNode[dir] = q; @@ -218,7 +303,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound -void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void MX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartY(); real endInner = grid[level]->getEndY(); @@ -234,7 +319,7 @@ void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond } -void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void PX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartY(); real endInner = grid[level]->getEndY(); @@ -249,7 +334,7 @@ void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond Side::addIndices(grid[level], boundaryCondition, "x", coordinateNormal, startInner, endInner, startOuter, endOuter); } -void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void MY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartX(); real endInner = grid[level]->getEndX(); @@ -265,7 +350,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond } -void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void PY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartX(); real endInner = grid[level]->getEndX(); @@ -281,7 +366,7 @@ void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond } -void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void MZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartX(); real endInner = grid[level]->getEndX(); @@ -296,7 +381,7 @@ void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter); } -void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) +void PZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition) { real startInner = grid[level]->getStartX(); real endInner = grid[level]->getEndX(); @@ -307,6 +392,6 @@ void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond real coordinateNormal = grid[level]->getEndZ() - grid[level]->getDelta(); if( coordinateNormal < grid[0]->getEndZ() - grid[0]->getDelta() ) return; - + Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter); } diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h index 6df6bfccc9a39b80de3ac43d057a03945d035b34..624b3722a1c909ba26063b49565779b924d34adc 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h @@ -33,10 +33,14 @@ #ifndef SIDE_H #define SIDE_H +#include <cstddef> #include <string> #include <vector> +#include <map> +#include <array> #include "gpu/GridGenerator/global.h" +#include "lbm/constants/D3Q27.h" #define X_INDEX 0 #define Y_INDEX 1 @@ -59,37 +63,59 @@ enum class SideType MX, PX, MY, PY, MZ, PZ, GEOMETRY }; - - class Side { public: virtual ~Side() = default; - virtual void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) = 0; + virtual void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, + SPtr<gg::BoundaryCondition> boundaryCondition) = 0; virtual int getCoordinate() const = 0; virtual int getDirection() const = 0; virtual SideType whoAmI() const = 0; + std::array<real, 3> getNormal() const; + protected: - static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, + void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter); static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); - static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); + void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); + + virtual void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const; + + virtual bool isAlignedWithMyNormal(const Grid *grid, int dir) const; + bool isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3>& normal) const; private: static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2); + void resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real>& qNode, int dir, const std::array<real, 3> &coordinates) const; + std::array<real, 3> getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates, + size_t direction) const; + bool neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const; + +protected: + const std::map<SideType, const std::array<real, 3>> normals = { + { SideType::MX, { NEGATIVE_DIR, 0.0, 0.0 } }, { SideType::PX, { POSITIVE_DIR, 0.0, 0.0 } }, + { SideType::MY, { 0.0, NEGATIVE_DIR, 0.0 } }, { SideType::PY, { 0.0, POSITIVE_DIR, 0.0 } }, + { SideType::MZ, { 0.0, 0.0, NEGATIVE_DIR } }, { SideType::PZ, { 0.0, 0.0, POSITIVE_DIR } } + }; + const std::map<SideType, size_t> sideToD3Q27 = { + { SideType::MX, vf::lbm::dir::DIR_M00 }, { SideType::PX, vf::lbm::dir::DIR_P00 }, + { SideType::MY, vf::lbm::dir::DIR_0M0 }, { SideType::PY, vf::lbm::dir::DIR_0P0 }, + { SideType::MZ, vf::lbm::dir::DIR_00M }, { SideType::PZ, vf::lbm::dir::DIR_00P } + }; }; class Geometry : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -110,7 +136,7 @@ public: class MX : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -131,7 +157,7 @@ public: class PX : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -153,7 +179,7 @@ public: class MY : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -174,7 +200,7 @@ public: class PY : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -196,7 +222,7 @@ public: class MZ : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -217,7 +243,7 @@ public: class PZ : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..36a286a8766db4af7e109eb3f8d47add401779f9 --- /dev/null +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp @@ -0,0 +1,873 @@ +#include "Side.h" +#include "PointerDefinitions.h" +#include "gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h" +#include "grid/GridImp.h" +#include "grid/NodeValues.h" +#include "lbm/constants/D3Q27.h" +#include "gmock/gmock.h" +#include <algorithm> +#include <gtest/gtest.h> +#include <iostream> +#include <memory> +#include <stdexcept> +#include <vector> + +using namespace vf::gpu; +using namespace vf::lbm::dir; + +class SideTestSpecificSubclass : public Side +{ + +public: + void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index) + { + Side::setQs(grid, boundaryCondition, index); + }; + int sideDirection = POSITIVE_DIR; + int coordinateDirection = X_INDEX; + SideType mySide = SideType::PX; + +private: + void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const override + { + } + + int getDirection() const override + { + return sideDirection; + } + + void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override + { + } + + int getCoordinate() const override + { + return coordinateDirection; + } + + SideType whoAmI() const override + { + return mySide; + } +}; + +class GridDouble : public GridImp +{ + +public: + int endDirection = -1; + + GridDouble() + { + this->distribution = DistributionHelper::getDistribution27(); + } + + void transIndexToCoords(uint index, real &x, real &y, real &z) const override + { + x = 0; + y = 0; + z = 0; + } + + real getDelta() const override + { + return 1.0; + } + + uint transCoordToIndex(const real &x, const real &y, const real &z) const override + { + return 0; + } + + char getFieldEntry(uint /*matrixIndex*/) const override + { + return STOPPER_OUT_OF_GRID_BOUNDARY; + } + + int getEndDirection() const override + { + return endDirection; + } +}; + +class BoundaryConditionSpy : public gg::BoundaryCondition +{ +public: + char getType() const override + { + return 't'; + }; + const std::vector<std::vector<real>> &getQs() + { + return this->qs; + } + void resetQVector() + { + this->qs.clear(); + } +}; + +class SideTestBC : public testing::Test +{ +protected: + SideTestSpecificSubclass side; + SPtr<GridDouble> grid = std::make_shared<GridDouble>(); + SPtr<BoundaryConditionSpy> bc = std::make_shared<BoundaryConditionSpy>(); + uint index = 0; + + std::vector<real> noBC; + + void SetUp() override + { + grid->endDirection = 26; + } +}; + +TEST_F(SideTestBC, setQs2D_whenSettingPX_setAllQsNormalToBC) +{ + grid->endDirection = 10; + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(11, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_PM0] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs2D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + grid->endDirection = 10; + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(11, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PM0] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPX_setAllQsNormalToPX) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + + // no previous BC on this node + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_PM0] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); + + // node already has BC in MX direction, but this does not change anything + + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + actualQs = bc->getQs()[0]; + + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenGeometryBCInVector_thenSetPX_throws) +{ + // do not add Geometry BC to this vector, as it has an invalid normal + grid->addBCalreadySet(SideType::GEOMETRY); + + EXPECT_THROW(side.setQs(grid, bc, index), std::out_of_range); +} + +TEST_F(SideTestBC, setQs3D_whenSettingPX_setAllQsNormalToBC) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_PM0] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PM0] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_PMP] = 0.5; + expectedQs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_PPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_PM0] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_P00] = 0.5; + expectedQs[DIR_PP0] = 0.5; + expectedQs[DIR_PM0] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_P00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_P00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_P00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_P00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_whenSettingMX_setAllQsNormalToBC) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_M00] = 0.5; + expectedQs[DIR_MP0] = 0.5; + expectedQs[DIR_MM0] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_MPP] = 0.5; + expectedQs[DIR_MMP] = 0.5; + expectedQs[DIR_MPM] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_M00] = 0.5; + expectedQs[DIR_MM0] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_MMP] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_M00] = 0.5; + expectedQs[DIR_MP0] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_MPP] = 0.5; + expectedQs[DIR_MPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_M00] = 0.5; + expectedQs[DIR_MP0] = 0.5; + expectedQs[DIR_MM0] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_MPM] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_M00] = 0.5; + expectedQs[DIR_MP0] = 0.5; + expectedQs[DIR_MM0] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_MPP] = 0.5; + expectedQs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_M00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_M00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::PZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_M00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain) +{ + side.coordinateDirection = X_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::MZ); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_M00] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_whenSettingMZ_setAllQsNormalToBC) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00M] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_0PM] = 0.5; + expectedQs[DIR_0MM] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_MPM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00M] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_0PM] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_MPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00M] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_0MM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00M] = 0.5; + expectedQs[DIR_M0M] = 0.5; + expectedQs[DIR_0PM] = 0.5; + expectedQs[DIR_0MM] = 0.5; + expectedQs[DIR_MPM] = 0.5; + expectedQs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00M] = 0.5; + expectedQs[DIR_P0M] = 0.5; + expectedQs[DIR_0PM] = 0.5; + expectedQs[DIR_0MM] = 0.5; + expectedQs[DIR_PPM] = 0.5; + expectedQs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = NEGATIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_whenSettingPZ_setAllQsNormalToBC) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00P] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_0PP] = 0.5; + expectedQs[DIR_0MP] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_MPP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + expectedQs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00P] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_0PP] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_MPP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00P] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_0MP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + expectedQs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00P] = 0.5; + expectedQs[DIR_M0P] = 0.5; + expectedQs[DIR_0PP] = 0.5; + expectedQs[DIR_0MP] = 0.5; + expectedQs[DIR_MPP] = 0.5; + expectedQs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQs(27, -1); + expectedQs[DIR_00P] = 0.5; + expectedQs[DIR_P0P] = 0.5; + expectedQs[DIR_0PP] = 0.5; + expectedQs[DIR_0MP] = 0.5; + expectedQs[DIR_PPP] = 0.5; + expectedQs[DIR_PMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::MY); + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::PX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5; + expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} + +TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain) +{ + side.coordinateDirection = Z_INDEX; + side.sideDirection = POSITIVE_DIR; + grid->addBCalreadySet(SideType::PY); + grid->addBCalreadySet(SideType::MX); + + side.setQs(grid, bc, index); + auto actualQs = bc->getQs()[0]; + + std::vector<real> expectedQsForTwoPreviousBCs(27, -1); + expectedQsForTwoPreviousBCs[DIR_00P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5; + expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5; + expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5; + EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs)); +} diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h index 3f28120a5d969fcc5d7b2a3402a2169ff97c0cc3..ad2ce473fb65fe4414f6da5c4caf0d3e140b7e02 100644 --- a/src/gpu/GridGenerator/grid/Grid.h +++ b/src/gpu/GridGenerator/grid/Grid.h @@ -47,6 +47,7 @@ struct Triangle; class GridInterface; class Object; class BoundingBox; +enum class SideType; class GRIDGENERATOR_EXPORT Grid { @@ -84,6 +85,8 @@ public: virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const = 0; virtual bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const = 0; + virtual bool isStopperForBC(uint index) const = 0; + virtual int *getNeighborsX() const = 0; virtual int *getNeighborsY() const = 0; virtual int *getNeighborsZ() const = 0; @@ -133,9 +136,9 @@ public: virtual void setPeriodicityY(bool periodicity) = 0; virtual void setPeriodicityZ(bool periodicity) = 0; - virtual bool getPeriodicityX() = 0; - virtual bool getPeriodicityY() = 0; - virtual bool getPeriodicityZ() = 0; + virtual bool getPeriodicityX() const = 0; + virtual bool getPeriodicityY() const = 0; + virtual bool getPeriodicityZ() const = 0; virtual void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) = 0; @@ -170,6 +173,11 @@ public: virtual void repairCommunicationIndices(int direction) = 0; + virtual bool nodeHasBC(uint index) const = 0; + + virtual std::vector<SideType> getBCAlreadySet() = 0; + virtual void addBCalreadySet(SideType side) = 0; + // needed for CUDA Streams virtual void findFluidNodeIndices(bool onlyBulk) = 0; virtual uint getNumberOfFluidNodes() const = 0; @@ -178,6 +186,20 @@ public: virtual void findFluidNodeIndicesBorder() = 0; virtual uint getNumberOfFluidNodesBorder() const = 0; virtual void getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const = 0; + + virtual void addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars) = 0; + virtual void addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce) = 0; + virtual void addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) = 0; + virtual void sortFluidNodeIndicesMacroVars() = 0; + virtual void sortFluidNodeIndicesApplyBodyForce() = 0; + virtual void sortFluidNodeIndicesAllFeatures() = 0; + + virtual uint getNumberOfFluidNodeIndicesMacroVars() const = 0; + virtual uint getNumberOfFluidNodeIndicesApplyBodyForce() const = 0; + virtual uint getNumberOfFluidNodeIndicesAllFeatures() const = 0; + virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars) const = 0; + virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce) const = 0; + virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures) const = 0; }; #endif diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 739aef59f76a33fa67d472a77ef258469f5e411c..f3d850384816f6690e5ffc158bbdc5e1df0ab328 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -54,6 +54,7 @@ class GridWrapper; class Transformator; class ArrowTransformator; class PolyDataWriterWrapper; +class TransientBCInputFileReader; class BoundingBox; class Grid; @@ -113,6 +114,15 @@ public: virtual void getPressureValues(real *rho, int *indices, int *neighborIndices, int level) const = 0; virtual void getPressureQs(real *qs[27], int level) const = 0; + virtual uint getPrecursorSize(int level) const = 0; + virtual void getPrecursorValues(uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, + real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM, + int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, + real& velocityX, real& velocityY, real& velocityZ, int level) const = 0; + + virtual void getPrecursorQs(real* qs[27], int level) const = 0; + virtual uint getGeometrySize(int level) const = 0; virtual void getGeometryIndices(int *indices, int level) const = 0; virtual void getGeometryQs(real *qs[27], int level) const = 0; @@ -136,6 +146,21 @@ public: virtual void getReceiveIndices(int *sendIndices, int direction, int level) = 0; virtual void findFluidNodes(bool splitDomain) = 0; + + virtual void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) = 0; + virtual void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) = 0; + virtual void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) = 0; + virtual void sortFluidNodeIndicesMacroVars(uint level) = 0; + virtual void sortFluidNodeIndicesApplyBodyForce(uint level) = 0; + virtual void sortFluidNodeIndicesAllFeatures(uint level) = 0; + virtual uint getNumberOfFluidNodesMacroVars(uint level) const = 0; + virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, int level) const = 0; + virtual uint getNumberOfFluidNodesApplyBodyForce(uint level) const = 0; + virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, int level) const = 0; + virtual uint getNumberOfFluidNodesAllFeatures(uint level) const = 0; + virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, int level) const = 0; + + }; -#endif \ No newline at end of file +#endif diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index 083b9a51e0b151f49922df456e968c4b204e4af7..003e6dcd223d2bf019c83f71349a9a7bec84efdc 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -1,28 +1,28 @@ //======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ // \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -// This file is part of VirtualFluids. VirtualFluids is free software: you can +// This file is part of VirtualFluids. VirtualFluids is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // @@ -52,6 +52,8 @@ #include "io/QLineWriter.h" #include "io/SimulationFileWriter/SimulationFileWriter.h" +#include "TransientBCSetter/TransientBCSetter.h" + #include "utilities/communication.h" #include "utilities/transformator/ArrowTransformator.h" @@ -103,28 +105,33 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma for (uint level = 0; level < getNumberOfGridLevels(); level++) { - if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) - { - boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX; - boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY; - boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ; - boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); + if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) + { + boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX; + boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY; + boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ; + boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); boundaryConditions[level]->geometryBoundaryCondition->fillSlipNormalLists(); *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n"; - } + } } } -void LevelGridBuilder::setStressBoundaryCondition( SideType sideType, - real nomalX, real normalY, real normalZ, - uint samplingOffset, real z0) +//======================================================================================= +//! \brief Set stress boundary concdition using iMEM +//! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled +//! \param z0 roughness length [m] +//! \param dx dx of level 0 [m] +//! +void LevelGridBuilder::setStressBoundaryCondition( SideType sideType, + real nomalX, real normalY, real normalZ, + uint samplingOffset, real z0, real dx) { for (uint level = 0; level < getNumberOfGridLevels(); level++) { - SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0); - + SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0*pow(2.0f,level)/dx); auto side = SideFactory::make(sideType); stressBoundaryCondition->side = side; @@ -171,17 +178,17 @@ void LevelGridBuilder::setVelocityGeometryBoundaryCondition(real vx, real vy, re for (uint level = 0; level < getNumberOfGridLevels(); level++) { - if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) - { - boundaryConditions[level]->geometryBoundaryCondition->vx = vx; - boundaryConditions[level]->geometryBoundaryCondition->vy = vy; - boundaryConditions[level]->geometryBoundaryCondition->vz = vz; - boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); + if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) + { + boundaryConditions[level]->geometryBoundaryCondition->vx = vx; + boundaryConditions[level]->geometryBoundaryCondition->vy = vy; + boundaryConditions[level]->geometryBoundaryCondition->vz = vz; + boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); boundaryConditions[level]->geometryBoundaryCondition->fillVelocityLists(); *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Velocity BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n"; - } + } } } @@ -223,7 +230,7 @@ void LevelGridBuilder::setNoSlipBoundaryCondition(SideType sideType) noSlipBoundaryCondition->fillVelocityLists(); // now effectively just a wrapper for velocityBC with zero velocity. No distinction in Gridgenerator. - boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition); + boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition); } } } @@ -234,12 +241,45 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition() for (uint level = 0; level < getNumberOfGridLevels(); level++) { - if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) - { - boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); + if (boundaryConditions[level]->geometryBoundaryCondition != nullptr) + { + boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition); *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry No-Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n"; - } + } + } +} + +void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, + real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap) +{ + if(fileLevelToGridLevelMap.empty()) + { + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Mapping precursor file levels to the corresponding grid levels" << "\n"; + + for (uint level = 0; level < getNumberOfGridLevels(); level++) + fileLevelToGridLevelMap.push_back(level); + } + else + { + if(fileLevelToGridLevelMap.size()!=getNumberOfGridLevels()) + throw std::runtime_error("In setPrecursorBoundaryCondition: fileLevelToGridLevelMap does not match with the number of levels"); + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Using user defined file to grid level mapping" << "\n"; + } + + for (uint level = 0; level < getNumberOfGridLevels(); level++) + { + auto reader = createReaderForCollection(fileCollection, fileLevelToGridLevelMap[level]); + SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ); + + auto side = SideFactory::make(sideType); + + precursorBoundaryCondition->side = side; + precursorBoundaryCondition->side->addIndices(grids, level, precursorBoundaryCondition); + + boundaryConditions[level]->precursorBoundaryConditions.push_back(precursorBoundaryCondition); + + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Precursor BC on level " << level << " with " << (int)precursorBoundaryCondition->indices.size() << "\n"; } } @@ -373,9 +413,9 @@ std::shared_ptr<Grid> LevelGridBuilder::getGrid(int level, int box) void LevelGridBuilder::checkLevel(int level) { if (level >= (int)grids.size()) - { + { std::cout << "wrong level input... return to caller\n"; - return; + return; } } @@ -386,16 +426,16 @@ void LevelGridBuilder::getDimensions(int &nx, int &ny, int &nz, const int level) nz = grids[level]->getNumberOfNodesZ(); } -void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords, - uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, +void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords, + uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, uint *geo, const int level) const { grids[level]->getNodeValues(xCoords, yCoords, zCoords, neighborX, neighborY, neighborZ, neighborNegative, geo); } -GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const -{ +GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const +{ grids[level]->getFluidNodeIndices(fluidNodeIndices); } @@ -404,9 +444,9 @@ GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndicesBorder(uint *flui grids[level]->getFluidNodeIndicesBorder(fluidNodeIndices); } -uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const +uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const { - return grids[level]->getNumberOfFluidNodes(); + return grids[level]->getNumberOfFluidNodes(); } GRIDGENERATOR_EXPORT uint LevelGridBuilder::getNumberOfFluidNodesBorder(unsigned int level) const @@ -432,7 +472,7 @@ void LevelGridBuilder::getSlipValues(real* normalX, real* normalY, real* normalZ for (uint index = 0; index < boundaryCondition->indices.size(); index++) { indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[index]) + 1; - + normalX[allIndicesCounter] = boundaryCondition->getNormalx(index); normalY[allIndicesCounter] = boundaryCondition->getNormaly(index); normalZ[allIndicesCounter] = boundaryCondition->getNormalz(index); @@ -467,9 +507,9 @@ uint LevelGridBuilder::getStressSize(int level) const return size; } -void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ, - real* vx, real* vy, real* vz, - real* vx1, real* vy1, real* vz1, +void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ, + real* vx, real* vy, real* vz, + real* vx1, real* vy1, real* vz1, int* indices, int* samplingIndices, int* samplingOffset, real* z0, int level) const { @@ -525,7 +565,7 @@ void LevelGridBuilder::getVelocityValues(real* vx, real* vy, real* vz, int* indi { for (uint i = 0; i < (uint)boundaryCondition->indices.size(); i++) { - indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1; + indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1; vx[allIndicesCounter] = boundaryCondition->getVx(i); vy[allIndicesCounter] = boundaryCondition->getVy(i); @@ -594,11 +634,91 @@ void LevelGridBuilder::getPressureQs(real* qs[27], int level) const } } +uint LevelGridBuilder::getPrecursorSize(int level) const +{ + uint size = 0; + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + size += uint(boundaryCondition->indices.size()); + } + return size; +} + +void LevelGridBuilder::getPrecursorValues( uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, + real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM, + int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, + real& velocityX, real& velocityY, real& velocityZ, int level) const +{ + int allIndicesCounter = 0; + int allNodesCounter = 0; + uint tmpTimeStepsBetweenReads = 0; + size_t tmpNumberOfQuantities = 0; + + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + if( tmpTimeStepsBetweenReads == 0 ) + tmpTimeStepsBetweenReads = boundaryCondition->timeStepsBetweenReads; + if( tmpTimeStepsBetweenReads != boundaryCondition->timeStepsBetweenReads ) + throw std::runtime_error("All precursor boundary conditions must have the same timeStepsBetweenReads value"); + auto BCreader = boundaryCondition->getReader(); + BCreader->setWritingOffset(allIndicesCounter); + reader.push_back(BCreader); + + std::vector<real> y, z; + real xTmp, yTmp, zTmp; + for(uint i = 0; i<boundaryCondition->indices.size(); i++) + { + indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) + 1; + grids[level]->transIndexToCoords(boundaryCondition->indices[i], xTmp, yTmp, zTmp); + y.push_back(yTmp); + z.push_back(zTmp); + allIndicesCounter++; + } + BCreader->fillArrays(y, z); + BCreader->getNeighbors(neighbor0PP, neighbor0PM, neighbor0MP, neighbor0MM); + BCreader->getWeights(weights0PP, weights0PM, weights0MP, weights0MM); + if(tmpNumberOfQuantities == 0) + tmpNumberOfQuantities = BCreader->getNumberOfQuantities(); + if(tmpNumberOfQuantities != BCreader->getNumberOfQuantities()) + throw std::runtime_error("All precursor files must have the same quantities."); + allNodesCounter += BCreader->getNPointsRead(); + velocityX = boundaryCondition->getVelocityX(); + velocityY = boundaryCondition->getVelocityY(); + velocityZ = boundaryCondition->getVelocityZ(); + } + numberOfPrecursorNodes = allNodesCounter; + + if (tmpTimeStepsBetweenReads == 0) + throw std::runtime_error("timeStepsBetweenReads of precursor needs to be larger than 0."); + timeStepsBetweenReads = tmpTimeStepsBetweenReads; + + if (tmpNumberOfQuantities == 0) + throw std::runtime_error("Number of quantities in precursor needs to be larger than 0."); + numberOfQuantities = tmpNumberOfQuantities; +} + +void LevelGridBuilder::getPrecursorQs(real* qs[27], int level) const +{ + int allIndicesCounter = 0; + for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions) + { + for ( uint index = 0; index < boundaryCondition->indices.size(); index++ ) + { + for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++) + { + qs[dir][allIndicesCounter] = boundaryCondition->qs[index][dir]; + } + allIndicesCounter++; + } + } +} + uint LevelGridBuilder::getGeometrySize(int level) const { if (boundaryConditions[level]->geometryBoundaryCondition) return (uint)boundaryConditions[level]->geometryBoundaryCondition->indices.size(); - + return 0; } @@ -619,9 +739,9 @@ void LevelGridBuilder::getGeometryValues(real* vx, real* vy, real* vz, int level { for (uint i = 0; i < boundaryConditions[level]->geometryBoundaryCondition->indices.size(); i++) { - vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i); - vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i); - vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i); + vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i); + vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i); + vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i); } } @@ -636,7 +756,7 @@ void LevelGridBuilder::getGeometryQs(real* qs[27], int level) const } } -void LevelGridBuilder::writeArrows(std::string fileName) const +void LevelGridBuilder::writeArrows(std::string fileName) const { QLineWriter::writeArrows(fileName, boundaryConditions[getNumberOfGridLevels() - 1]->geometryBoundaryCondition, grids[getNumberOfGridLevels() - 1]); } @@ -674,4 +794,65 @@ void LevelGridBuilder::findFluidNodes(bool splitDomain) for (uint i = 0; i < grids.size(); i++) grids[i]->findFluidNodeIndices(splitDomain); *logging::out << logging::Logger::INFO_HIGH << "Done with findFluidNodes()\n"; -} \ No newline at end of file +} + + +void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) +{ + grids[level]->addFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars); +} + +void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) +{ + grids[level]->addFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce); +} + +void LevelGridBuilder::addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) +{ + grids[level]->addFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures); +} + +void LevelGridBuilder::sortFluidNodeIndicesMacroVars(uint level) +{ + grids[level]->sortFluidNodeIndicesMacroVars(); +} + +void LevelGridBuilder::sortFluidNodeIndicesApplyBodyForce(uint level) +{ + grids[level]->sortFluidNodeIndicesApplyBodyForce(); +} + +void LevelGridBuilder::sortFluidNodeIndicesAllFeatures(uint level) +{ + grids[level]->sortFluidNodeIndicesAllFeatures(); +} + +uint LevelGridBuilder::getNumberOfFluidNodesMacroVars(unsigned int level) const +{ + return grids[level]->getNumberOfFluidNodeIndicesMacroVars(); +} + +void LevelGridBuilder::getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const +{ + grids[level]->getFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars); +} + +uint LevelGridBuilder::getNumberOfFluidNodesApplyBodyForce(unsigned int level) const +{ + return grids[level]->getNumberOfFluidNodeIndicesApplyBodyForce(); +} + +void LevelGridBuilder::getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const +{ + grids[level]->getFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce); +} + +uint LevelGridBuilder::getNumberOfFluidNodesAllFeatures(unsigned int level) const +{ + return grids[level]->getNumberOfFluidNodeIndicesAllFeatures(); +} + +void LevelGridBuilder::getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const +{ + grids[level]->getFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures); +} diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index afb027fc1665ab874523bf39ec2a05518d28f7a1..2e0eaf13080c46260de2a0c845fbf784a2cc3e09 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -1,28 +1,28 @@ //======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ // \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -// This file is part of VirtualFluids. VirtualFluids is free software: you can +// This file is part of VirtualFluids. VirtualFluids is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // @@ -38,6 +38,8 @@ #include <memory> #include <array> +#include <lbm/constants/NumericConstants.h> + #include "gpu/GridGenerator/global.h" #include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h" @@ -45,6 +47,8 @@ #include "gpu/GridGenerator/grid/GridInterface.h" #include "gpu/GridGenerator/grid/NodeValues.h" +using namespace vf::lbm::constant; + struct Vertex; class Grid; class Transformator; @@ -58,9 +62,11 @@ class SlipBoundaryCondition; class StressBoundaryCondition; class PressureBoundaryCondition; class GeometryBoundaryCondition; +class PrecursorBoundaryCondition; enum class SideType; - +class TransientBCInputFileReader; +class FileCollection; class LevelGridBuilder : public GridBuilder { @@ -75,11 +81,14 @@ public: GRIDGENERATOR_EXPORT ~LevelGridBuilder() override; GRIDGENERATOR_EXPORT void setSlipBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ); - GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0); + GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0, real dx); GRIDGENERATOR_EXPORT void setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz); GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho); GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z); GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType); + GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, + real velocityX=c0o1, real velocityY=c0o1, real velocityZ=c0o1, + std::vector<uint> fileLevelToGridLevelMap = {}); GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall); @@ -97,7 +106,7 @@ public: GRIDGENERATOR_EXPORT virtual void getFluidNodeIndicesBorder(uint *fluidNodeIndices, const int level) const override; GRIDGENERATOR_EXPORT virtual void getNodeValues(real *xCoords, real *yCoords, real *zCoords, - uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, + uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, uint *geo, const int level) const override; GRIDGENERATOR_EXPORT virtual void getDimensions(int &nx, int &ny, int &nz, const int level) const override; @@ -107,12 +116,12 @@ public: GRIDGENERATOR_EXPORT virtual void getSlipQs(real* qs[27], int level) const override; GRIDGENERATOR_EXPORT uint getStressSize(int level) const override; - GRIDGENERATOR_EXPORT virtual void getStressValues( real* normalX, real* normalY, real* normalZ, - real* vx, real* vy, real* vz, - real* vx1, real* vy1, real* vz1, + GRIDGENERATOR_EXPORT virtual void getStressValues( real* normalX, real* normalY, real* normalZ, + real* vx, real* vy, real* vz, + real* vx1, real* vy1, real* vz1, int* indices, int* samplingIndices, int* samplingOffsets, real* z0, int level) const override; GRIDGENERATOR_EXPORT virtual void getStressQs(real* qs[27], int level) const override; - + GRIDGENERATOR_EXPORT uint getVelocitySize(int level) const override; GRIDGENERATOR_EXPORT virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const override; GRIDGENERATOR_EXPORT virtual void getVelocityQs(real* qs[27], int level) const override; @@ -121,6 +130,14 @@ public: GRIDGENERATOR_EXPORT void getPressureValues(real* rho, int* indices, int* neighborIndices, int level) const override; GRIDGENERATOR_EXPORT virtual void getPressureQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT uint getPrecursorSize(int level) const override; + GRIDGENERATOR_EXPORT void getPrecursorValues( uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, + real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM, + int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, + int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, + real& velocityX, real& velocityY, real& velocityZ, int level) const override; + GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override; + GRIDGENERATOR_EXPORT virtual void getGeometryQs(real *qs[27], int level) const override; GRIDGENERATOR_EXPORT virtual uint getGeometrySize(int level) const override; GRIDGENERATOR_EXPORT virtual void getGeometryIndices(int *indices, int level) const override; @@ -133,11 +150,11 @@ public: GRIDGENERATOR_EXPORT SPtr<GeometryBoundaryCondition> getGeometryBoundaryCondition(uint level) const override; protected: - + struct BoundaryConditions { - BoundaryConditions() = default; + BoundaryConditions() = default; std::vector<SPtr<SlipBoundaryCondition>> slipBoundaryConditions; @@ -149,13 +166,15 @@ protected: std::vector<SPtr<VelocityBoundaryCondition>> noSlipBoundaryConditions; + std::vector<SPtr<PrecursorBoundaryCondition>> precursorBoundaryConditions; + SPtr<GeometryBoundaryCondition> geometryBoundaryCondition; }; bool geometryHasValues = false; std::vector<std::shared_ptr<Grid> > grids; std::vector<SPtr<BoundaryConditions> > boundaryConditions; - + std::array<uint, 6> communicationProcesses; void checkLevel(int level); @@ -194,7 +213,21 @@ public: // needed for CUDA Streams MultiGPU (Communication Hiding) void findFluidNodes(bool splitDomain) override; + + void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) override; + void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) override; + void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) override; + + void sortFluidNodeIndicesMacroVars(uint level) override; + void sortFluidNodeIndicesApplyBodyForce(uint level) override; + void sortFluidNodeIndicesAllFeatures(uint level) override; + + uint getNumberOfFluidNodesMacroVars(unsigned int level) const override; + void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const override; + uint getNumberOfFluidNodesApplyBodyForce(unsigned int level) const override; + void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const override; + uint getNumberOfFluidNodesAllFeatures(unsigned int level) const override; + void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const override; }; #endif - diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp index 31bbf3ddc87184846fcb01a3e6631358b6a6f864..32cf9d07da87149695a5bf548ed357be2b2f71b4 100644 --- a/src/gpu/GridGenerator/grid/GridImp.cpp +++ b/src/gpu/GridGenerator/grid/GridImp.cpp @@ -1,28 +1,28 @@ //======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ // \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -// This file is part of VirtualFluids. VirtualFluids is free software: you can +// This file is part of VirtualFluids. VirtualFluids is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // @@ -33,7 +33,6 @@ #include "GridImp.h" #include <iostream> -#include <omp.h> #include <sstream> # include <algorithm> #include <cmath> @@ -61,8 +60,8 @@ int DIRECTIONS[DIR_END_MAX][DIMENSION]; using namespace vf::gpu; -GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level) - : object(object), +GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level) + : object(object), startX(startX), startY(startY), startZ(startZ), @@ -135,7 +134,7 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers) #pragma omp parallel for for (int index = 0; index < (int)this->size; index++) this->initalNodeToOutOfGrid(index); - + if( this->innerRegionFromFinerGrid ){ *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start setInnerBasedOnFinerGrid()\n"; this->setInnerBasedOnFinerGrid(fineGrid); @@ -147,12 +146,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers) *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start addOverlap()\n"; this->addOverlap(); - + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixOddCells()\n"; #pragma omp parallel for for (int index = 0; index < (int)this->size; index++) this->fixOddCell(index); - + if( enableFixRefinementIntoTheWall ) { *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixRefinementIntoWall()\n"; @@ -180,12 +179,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers) } } } - + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start findEndOfGridStopperNodes()\n"; #pragma omp parallel for for (int index = 0; index < (int)this->size; index++) this->findEndOfGridStopperNode(index); - + *logging::out << logging::Logger::INFO_INTERMEDIATE << "Grid created: " << "from (" << this->startX << ", " << this->startY << ", " << this->startZ << ") to (" << this->endX << ", " << this->endY << ", " << this->endZ << ")\n" << "nodes: " << this->nx << " x " << this->ny << " x " << this->nz << " = " << this->size << "\n"; @@ -209,9 +208,9 @@ void GridImp::freeMemory() if( this->neighborIndexZ != nullptr ) { delete[] this->neighborIndexZ; this->neighborIndexZ = nullptr; } if( this->neighborIndexNegative != nullptr ) { delete[] this->neighborIndexNegative; this->neighborIndexNegative = nullptr; } if( this->sparseIndices != nullptr ) { delete[] this->sparseIndices; this->sparseIndices = nullptr; } - if( this->qIndices != nullptr ) { delete[] this->qIndices; this->qIndices = nullptr; } - if( this->qValues != nullptr ) { delete[] this->qValues; this->qValues = nullptr; } - if( this->qPatches != nullptr ) { delete[] this->qPatches; this->qPatches = nullptr; } + if( this->qIndices != nullptr ) { delete[] this->qIndices; this->qIndices = nullptr; } + if( this->qValues != nullptr ) { delete[] this->qValues; this->qValues = nullptr; } + if( this->qPatches != nullptr ) { delete[] this->qPatches; this->qPatches = nullptr; } field.freeMemory(); } @@ -254,7 +253,7 @@ void GridImp::discretize(Object* solidObject, char innerType, char outerType) this->sparseIndices[index] = index; if( this->getFieldEntry(index) == innerType ) continue; - + real x, y, z; this->transIndexToCoords(index, x, y, z); @@ -279,7 +278,7 @@ bool GridImp::isInside(const Cell& cell) const // | +-----+-----+-----+ | +-----+-----+-----+ // +---------+ +---------+ // 0 1 2 0 1 2 -// even even even +// even even even // odd odd odd // Cell GridImp::getOddCellFromIndex(uint index) const @@ -349,7 +348,7 @@ void GridImp::addOverlap() void GridImp::setOverlapTmp( uint index ) { if( this->field.is( index, INVALID_OUT_OF_GRID ) ){ - + if( this->hasNeighborOfType(index, FLUID) ){ this->field.setFieldEntry( index, OVERLAP_TMP ); } @@ -380,7 +379,7 @@ void GridImp::fixRefinementIntoWall(uint xIndex, uint yIndex, uint zIndex, int d if( this->xOddStart && ( dir == 1 || dir == -1 ) && ( xIndex % 2 == 0 && xIndex != 0 ) ) return; if( this->yOddStart && ( dir == 2 || dir == -2 ) && ( yIndex % 2 == 0 && yIndex != 0 ) ) return; if( this->zOddStart && ( dir == 3 || dir == -3 ) && ( zIndex % 2 == 0 && zIndex != 0 ) ) return; - + ////////////////////////////////////////////////////////////////////////// real dx{ 0.0 }, dy{ 0.0 }, dz{ 0.0 }; @@ -433,31 +432,31 @@ void GridImp::findStopperNode(uint index) // deprecated void GridImp::findEndOfGridStopperNode(uint index) { - if (isValidEndOfGridStopper(index)){ + if (isValidEndOfGridStopper(index)){ if( this->level != 0 ) - this->field.setFieldEntryToStopperOutOfGrid(index); + this->field.setFieldEntryToStopperOutOfGrid(index); else this->field.setFieldEntryToStopperOutOfGridBoundary(index); } - - if (isValidEndOfGridBoundaryStopper(index)) - this->field.setFieldEntryToStopperOutOfGridBoundary(index); + + if (isValidEndOfGridBoundaryStopper(index)) + this->field.setFieldEntryToStopperOutOfGridBoundary(index); } void GridImp::findSolidStopperNode(uint index) { - if (isValidSolidStopper(index)) - this->field.setFieldEntry(index, STOPPER_SOLID); + if (isValidSolidStopper(index)) + this->field.setFieldEntry(index, STOPPER_SOLID); } void GridImp::findBoundarySolidNode(uint index) { - if (shouldBeBoundarySolidNode(index)) - { - this->field.setFieldEntry(index, BC_SOLID); - this->qIndices[index] = this->numberOfSolidBoundaryNodes++; - //grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1); - } + if (shouldBeBoundarySolidNode(index)) + { + this->field.setFieldEntry(index, BC_SOLID); + this->qIndices[index] = this->numberOfSolidBoundaryNodes++; + //grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1); + } } void GridImp::fixOddCell(uint index) @@ -483,9 +482,9 @@ bool GridImp::isOutSideOfGrid(Cell &cell) const bool GridImp::contains(Cell &cell, char type) const { for (const auto point : cell) { - uint index = transCoordToIndex(point.x, point.y, point.z); - if (index == INVALID_INDEX) - continue; + uint index = transCoordToIndex(point.x, point.y, point.z); + if (index == INVALID_INDEX) + continue; if (field.is(index, type)) return true; } @@ -495,8 +494,8 @@ bool GridImp::contains(Cell &cell, char type) const bool GridImp::cellContainsOnly(Cell &cell, char type) const { for (const auto point : cell) { - uint index = transCoordToIndex(point.x, point.y, point.z); - if (index == INVALID_INDEX) + uint index = transCoordToIndex(point.x, point.y, point.z); + if (index == INVALID_INDEX) return false; if (!field.is(index, type)) return false; @@ -507,8 +506,8 @@ bool GridImp::cellContainsOnly(Cell &cell, char type) const bool GridImp::cellContainsOnly(Cell &cell, char typeA, char typeB) const { for (const auto point : cell) { - uint index = transCoordToIndex(point.x, point.y, point.z); - if (index == INVALID_INDEX) + uint index = transCoordToIndex(point.x, point.y, point.z); + if (index == INVALID_INDEX) return false; if (!field.is(index, typeA) && !field.is(index, typeB)) return false; @@ -524,91 +523,91 @@ const Object * GridImp::getObject() const void GridImp::setNodeTo(Cell &cell, char type) { for (const auto point : cell) { - uint index = transCoordToIndex(point.x, point.y, point.z); - if (index == INVALID_INDEX) - continue; - field.setFieldEntry(index, type); + uint index = transCoordToIndex(point.x, point.y, point.z); + if (index == INVALID_INDEX) + continue; + field.setFieldEntry(index, type); } } void GridImp::setNodeTo(uint index, char type) { - if( index != INVALID_INDEX ) - field.setFieldEntry(index, type); + if( index != INVALID_INDEX ) + field.setFieldEntry(index, type); } bool GridImp::isNode(uint index, char type) const { if( index != INVALID_INDEX ) - return field.is(index, type); + return field.is(index, type); throw std::runtime_error("GridImp::isNode() -> index == INVALID_INDEX not supported."); } bool GridImp::isValidEndOfGridStopper(uint index) const { - // Lenz: also includes corner stopper nodes - if (!this->field.is(index, INVALID_OUT_OF_GRID)) - return false; + // Lenz: also includes corner stopper nodes + if (!this->field.is(index, INVALID_OUT_OF_GRID)) + return false; - return hasNeighborOfType(index, FLUID); + return hasNeighborOfType(index, FLUID); } bool GridImp::isValidEndOfGridBoundaryStopper(uint index) const { - // Lenz: also includes corner stopper nodes - if (!this->field.is(index, FLUID)) - return false; + // Lenz: also includes corner stopper nodes + if (!this->field.is(index, FLUID)) + return false; - return ! hasAllNeighbors(index); + return ! hasAllNeighbors(index); } bool GridImp::isValidSolidStopper(uint index) const { - // Lenz: also includes corner stopper nodes - if (!this->field.is(index, INVALID_SOLID)) - return false; + // Lenz: also includes corner stopper nodes + if (!this->field.is(index, INVALID_SOLID)) + return false; - return hasNeighborOfType(index, FLUID); + return hasNeighborOfType(index, FLUID); } bool GridImp::shouldBeBoundarySolidNode(uint index) const { - if (!this->field.is(index, FLUID)) - return false; + if (!this->field.is(index, FLUID)) + return false; - return hasNeighborOfType(index, STOPPER_SOLID); + return hasNeighborOfType(index, STOPPER_SOLID); } bool GridImp::hasAllNeighbors(uint index) const { - // new version by Lenz, utilizes the range based for loop for all directions - real x, y, z; - this->transIndexToCoords(index, x, y, z); - for (const auto dir : this->distribution) { - const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta()); + // new version by Lenz, utilizes the range based for loop for all directions + real x, y, z; + this->transIndexToCoords(index, x, y, z); + for (const auto dir : this->distribution) { + const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta()); - if (neighborIndex == INVALID_INDEX) return false; - } + if (neighborIndex == INVALID_INDEX) return false; + } - return true; + return true; } bool GridImp::hasNeighborOfType(uint index, char type) const { - // new version by Lenz, utilizes the range based for loop for all directions - real x, y, z; - this->transIndexToCoords(index, x, y, z); - for (const auto dir : this->distribution) { - const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta()); + // new version by Lenz, utilizes the range based for loop for all directions + real x, y, z; + this->transIndexToCoords(index, x, y, z); + for (const auto dir : this->distribution) { + const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta()); - if (neighborIndex == INVALID_INDEX) continue; + if (neighborIndex == INVALID_INDEX) continue; - if (this->field.is(neighborIndex, type)) - return true; - } + if (this->field.is(neighborIndex, type)) + return true; + } - return false; + return false; } bool GridImp::nodeInNextCellIs(int index, char type) const @@ -630,13 +629,13 @@ bool GridImp::nodeInNextCellIs(int index, char type) const const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ); - const bool typeX = indexX == INVALID_INDEX ? false : this->field.is(indexX, type); - const bool typeY = indexY == INVALID_INDEX ? false : this->field.is(indexY, type); - const bool typeXY = indexXY == INVALID_INDEX ? false : this->field.is(indexXY, type); - const bool typeZ = indexZ == INVALID_INDEX ? false : this->field.is(indexZ, type); - const bool typeYZ = indexYZ == INVALID_INDEX ? false : this->field.is(indexYZ, type); - const bool typeXZ = indexXZ == INVALID_INDEX ? false : this->field.is(indexXZ, type); - const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type); + const bool typeX = indexX == INVALID_INDEX ? false : this->field.is(indexX, type); + const bool typeY = indexY == INVALID_INDEX ? false : this->field.is(indexY, type); + const bool typeXY = indexXY == INVALID_INDEX ? false : this->field.is(indexXY, type); + const bool typeZ = indexZ == INVALID_INDEX ? false : this->field.is(indexZ, type); + const bool typeYZ = indexYZ == INVALID_INDEX ? false : this->field.is(indexYZ, type); + const bool typeXZ = indexXZ == INVALID_INDEX ? false : this->field.is(indexXZ, type); + const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type); return typeX || typeY || typeXY || typeZ || typeYZ || typeXZ || typeXYZ; @@ -661,13 +660,13 @@ bool GridImp::nodeInPreviousCellIs(int index, char type) const const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ); - const bool typeX = indexX == INVALID_INDEX ? false : this->field.is(indexX , type); - const bool typeY = indexY == INVALID_INDEX ? false : this->field.is(indexY , type); - const bool typeXY = indexXY == INVALID_INDEX ? false : this->field.is(indexXY , type); - const bool typeZ = indexZ == INVALID_INDEX ? false : this->field.is(indexZ , type); - const bool typeYZ = indexYZ == INVALID_INDEX ? false : this->field.is(indexYZ , type); - const bool typeXZ = indexXZ == INVALID_INDEX ? false : this->field.is(indexXZ , type); - const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type); + const bool typeX = indexX == INVALID_INDEX ? false : this->field.is(indexX , type); + const bool typeY = indexY == INVALID_INDEX ? false : this->field.is(indexY , type); + const bool typeXY = indexXY == INVALID_INDEX ? false : this->field.is(indexXY , type); + const bool typeZ = indexZ == INVALID_INDEX ? false : this->field.is(indexZ , type); + const bool typeYZ = indexYZ == INVALID_INDEX ? false : this->field.is(indexYZ , type); + const bool typeXZ = indexXZ == INVALID_INDEX ? false : this->field.is(indexXZ , type); + const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type); return typeX || typeY || typeXY || typeZ || typeYZ || typeXZ || typeXYZ; @@ -678,8 +677,8 @@ bool GridImp::nodeInCellIs(Cell& cell, char type) const for (const auto node : cell) { const uint index = transCoordToIndex(node.x, node.y, node.z); - if (index == INVALID_INDEX) - continue; + if (index == INVALID_INDEX) + continue; if (field.is(index, type)) return true; } @@ -696,9 +695,9 @@ void GridImp::setCellTo(uint index, char type) for (const auto node : cell) { const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z); - if (nodeIndex == INVALID_INDEX) - continue; - this->field.setFieldEntry(nodeIndex, type); + if (nodeIndex == INVALID_INDEX) + continue; + this->field.setFieldEntry(nodeIndex, type); } } @@ -712,15 +711,21 @@ void GridImp::setNonStopperOutOfGridCellTo(uint index, char type) for (const auto node : cell) { const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z); - if (nodeIndex == INVALID_INDEX) - continue; + if (nodeIndex == INVALID_INDEX) + continue; - if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID && + if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID && this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID_BOUNDARY ) this->field.setFieldEntry(nodeIndex, type); } } +bool GridImp::nodeHasBC(uint index) const +{ + return (getFieldEntry(index) == vf::gpu::BC_PRESSURE || getFieldEntry(index) == vf::gpu::BC_VELOCITY || + getFieldEntry(index) == vf::gpu::BC_NOSLIP || getFieldEntry(index) == vf::gpu::BC_SLIP || + getFieldEntry(index) == vf::gpu::BC_STRESS); +} void GridImp::setPeriodicity(bool periodicityX, bool periodicityY, bool periodicityZ) { @@ -744,17 +749,17 @@ void GridImp::setPeriodicityZ(bool periodicity) this->periodicityZ = periodicity; } -bool GridImp::getPeriodicityX() +bool GridImp::getPeriodicityX() const { return this->periodicityX; } -bool GridImp::getPeriodicityY() +bool GridImp::getPeriodicityY() const { return this->periodicityY; } -bool GridImp::getPeriodicityZ() +bool GridImp::getPeriodicityZ() const { return this->periodicityZ; } @@ -770,7 +775,7 @@ uint GridImp::transCoordToIndex(const real &x, const real &y, const real &z) con const uint yIndex = getYIndex(y); const uint zIndex = getZIndex(z); - if (xIndex >= nx || yIndex >= ny || zIndex >= nz) + if (xIndex >= nx || yIndex >= ny || zIndex >= nz) return INVALID_INDEX; return xIndex + nx * (yIndex + ny * zIndex); @@ -819,20 +824,20 @@ TriangularMeshDiscretizationStrategy * GridImp::getTriangularMeshDiscretizationS uint GridImp::getNumberOfSolidBoundaryNodes() const { - return this->numberOfSolidBoundaryNodes; + return this->numberOfSolidBoundaryNodes; } void GridImp::setNumberOfSolidBoundaryNodes(uint numberOfSolidBoundaryNodes) { - if (numberOfSolidBoundaryNodes < INVALID_INDEX) - this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes; + if (numberOfSolidBoundaryNodes < INVALID_INDEX) + this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes; } real GridImp::getQValue(const uint index, const uint dir) const { - const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index]; + const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index]; - return this->qValues[qIndex]; + return this->qValues[qIndex]; } uint GridImp::getQPatch(const uint index) const @@ -858,7 +863,7 @@ void GridImp::findSparseIndices(SPtr<Grid> finerGrid) { *logging::out << logging::Logger::INFO_INTERMEDIATE << "Find sparse indices..."; auto fineGrid = std::static_pointer_cast<GridImp>(finerGrid); - + this->updateSparseIndices(); #pragma omp parallel for @@ -906,7 +911,7 @@ void GridImp::updateSparseIndices() sparseSize = size - removedNodes; } -void GridImp::findFluidNodeIndices(bool splitDomain) +void GridImp::findFluidNodeIndices(bool splitDomain) { // find sparse index of all fluid nodes this->fluidNodeIndices.clear(); @@ -935,7 +940,7 @@ void GridImp::findFluidNodeIndicesBorder() { // resize fluidNodeIndicesBorder (for better performance in copy operation) size_t newSize = 0; for (CommunicationIndices& ci : this->communicationIndices) - newSize += ci.sendIndices.size(); + newSize += ci.sendIndices.size(); this->fluidNodeIndicesBorder.reserve(newSize); // copy all send indices to fluidNodeIndicesBorder @@ -968,7 +973,7 @@ void GridImp::setNeighborIndices(uint index) this->setStopperNeighborCoords(index); return; } - + if (this->sparseIndices[index] == -1) return; @@ -1002,9 +1007,9 @@ void GridImp::setStopperNeighborCoords(uint index) if (vf::Math::lessEqual(z + delta, endZ + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y, z + delta))) neighborIndexZ[index] = getSparseIndex(x, y, z + delta); - if (vf::Math::greaterEqual(x - delta, endX) && - vf::Math::greaterEqual(y - delta, endY) && - vf::Math::greaterEqual(z - delta, endZ) && + if (vf::Math::greaterEqual(x - delta, endX) && + vf::Math::greaterEqual(y - delta, endY) && + vf::Math::greaterEqual(z - delta, endZ) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x - delta, y - delta, z - delta))) { neighborIndexNegative[index] = getSparseIndex(x - delta, y - delta, z - delta); @@ -1035,7 +1040,7 @@ real GridImp::getNeighborCoord(bool periodicity, real startCoord, real coords[3] return coords[direction] + delta; } - + return coords[direction] + delta; } @@ -1061,7 +1066,7 @@ real GridImp::getNegativeNeighborCoord(bool periodicity, real startCoord, real c return getLastFluidNode(coords, direction, startCoord); } - + return coords[direction] - delta; } @@ -1155,7 +1160,7 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks if( lbmOrGks == LBM ) tmpSubDomainBox.extend(this->delta); - if (!tmpSubDomainBox.isInside(x, y, z) + if (!tmpSubDomainBox.isInside(x, y, z) && ( this->getFieldEntry(index) == FLUID || this->getFieldEntry(index) == FLUID_CFC || this->getFieldEntry(index) == FLUID_CFF || @@ -1184,13 +1189,13 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks void GridImp::findGridInterfaceCF(uint index, GridImp& finerGrid, LbmOrGks lbmOrGks) { - if (lbmOrGks == LBM) - { - gridInterface->findInterfaceCF (index, this, &finerGrid); - gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid); - } - else if (lbmOrGks == GKS) - gridInterface->findInterfaceCF_GKS(index, this, &finerGrid); + if (lbmOrGks == LBM) + { + gridInterface->findInterfaceCF (index, this, &finerGrid); + gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid); + } + else if (lbmOrGks == GKS) + gridInterface->findInterfaceCF_GKS(index, this, &finerGrid); } void GridImp::findGridInterfaceFC(uint index, GridImp& finerGrid) @@ -1217,16 +1222,16 @@ void GridImp::mesh(Object* object) if (triangularMesh) triangularMeshDiscretizationStrategy->discretize(triangularMesh, this, INVALID_SOLID, FLUID); else - //new method for geometric primitives (not cell based) to be implemented + //new method for geometric primitives (not cell based) to be implemented this->discretize(object, INVALID_SOLID, FLUID); this->closeNeedleCells(); - #pragma omp parallel for + #pragma omp parallel for for (int index = 0; index < (int)this->size; index++) this->findSolidStopperNode(index); - //#pragma omp parallel for + //#pragma omp parallel for for (int index = 0; index < (int)this->size; index++) { this->findBoundarySolidNode(index); } @@ -1359,7 +1364,7 @@ void GridImp::findQs(Object* object) //TODO: enable qs for primitive objects findQsPrimitive(object); } -void GridImp::allocateQs() +void GridImp::allocateQs() { this->qPatches = new uint[this->getNumberOfSolidBoundaryNodes()]; @@ -1379,8 +1384,8 @@ void GridImp::findQs(TriangularMesh &triangularMesh) if( this->qComputationStage == qComputationStageType::ComputeQs ) allocateQs(); - - + + #pragma omp parallel for for (int i = 0; i < triangularMesh.size; i++) this->findQs(triangularMesh.triangles[i]); @@ -1406,15 +1411,15 @@ void GridImp::findQs(Triangle &triangle) //if (!field.isFluid(index)) // continue; - if( index == INVALID_INDEX ) continue; + if( index == INVALID_INDEX ) continue; const Vertex point(x, y, z); if( this->qComputationStage == qComputationStageType::ComputeQs ){ if(this->field.is(index, BC_SOLID)) { - calculateQs(index, point, triangle); - } + calculateQs(index, point, triangle); + } } else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes ) { @@ -1449,14 +1454,14 @@ void GridImp::findQsPrimitive(Object * object) real x,y,z; this->transIndexToCoords(index,x,y,z); - + const Vertex point(x, y, z); if( this->qComputationStage == qComputationStageType::ComputeQs ){ if(this->field.is(index, BC_SOLID)) { - calculateQs(index, point, object); - } + calculateQs(index, point, object); + } } else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes ) { @@ -1477,66 +1482,66 @@ void GridImp::calculateQs(const uint index, const Vertex &point, Object* object) { Vertex pointOnTriangle, direction; - real subdistance; - int error; - for (int i = distribution.dir_start; i <= distribution.dir_end; i++) - { - direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), + real subdistance; + int error; + for (int i = distribution.dir_start; i <= distribution.dir_end; i++) + { + direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), real(distribution.dirs[i * DIMENSION + 1]), - real(distribution.dirs[i * DIMENSION + 2]) ); + real(distribution.dirs[i * DIMENSION + 2]) ); - uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, - point.y + direction.y * this->delta, - point.z + direction.z * this->delta); + uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, + point.y + direction.y * this->delta, + point.z + direction.z * this->delta); - if (neighborIndex == INVALID_INDEX) continue; + if (neighborIndex == INVALID_INDEX) continue; - error = object->getIntersection(point, direction, pointOnTriangle, subdistance); + error = object->getIntersection(point, direction, pointOnTriangle, subdistance); - subdistance /= this->delta; + subdistance /= this->delta; - if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) - { - if ( -0.5 > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] || + if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) + { + if ( -0.5 > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] || subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ) - { + { + + this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance; - this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance; - this->qPatches[ this->qIndices[index] ] = 0; - } - } - } + } + } + } } bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex &point, Object* object) const { Vertex pointOnTriangle, direction; - real subdistance; - int error; - for (int i = distribution.dir_start; i <= distribution.dir_end; i++) - { - direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), + real subdistance; + int error; + for (int i = distribution.dir_start; i <= distribution.dir_end; i++) + { + direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), real(distribution.dirs[i * DIMENSION + 1]), - real(distribution.dirs[i * DIMENSION + 2]) ); + real(distribution.dirs[i * DIMENSION + 2]) ); - uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, - point.y + direction.y * this->delta, - point.z + direction.z * this->delta); + uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, + point.y + direction.y * this->delta, + point.z + direction.z * this->delta); - if (neighborIndex == INVALID_INDEX) continue; + if (neighborIndex == INVALID_INDEX) continue; - error = object->getIntersection(point, direction, pointOnTriangle, subdistance); + error = object->getIntersection(point, direction, pointOnTriangle, subdistance); - subdistance /= this->delta; + subdistance /= this->delta; - if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) - { - return true; - } - } + if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) + { + return true; + } + } return false; } @@ -1565,7 +1570,7 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance); - subdistance /= this->delta; + subdistance /= this->delta; if (error == 0 && subdistance < 1.0 && subdistance > 0.0) { @@ -1577,81 +1582,80 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const void GridImp::calculateQs(const uint index, const Vertex &point, const Triangle &triangle) const { - Vertex pointOnTriangle, direction; - real subdistance; - int error; - for (int i = distribution.dir_start; i <= distribution.dir_end; i++) - { + Vertex pointOnTriangle, direction; + real subdistance; + int error; + for (int i = distribution.dir_start; i <= distribution.dir_end; i++) + { #if defined(__CUDA_ARCH__) - direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]); + direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]); #else - direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), + direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), real(distribution.dirs[i * DIMENSION + 1]), - real(distribution.dirs[i * DIMENSION + 2]) ); + real(distribution.dirs[i * DIMENSION + 2]) ); #endif - uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, - point.y + direction.y * this->delta, - point.z + direction.z * this->delta); + uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, + point.y + direction.y * this->delta, + point.z + direction.z * this->delta); - if (neighborIndex == INVALID_INDEX) continue; + if (neighborIndex == INVALID_INDEX) continue; - error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance); + error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance); - subdistance /= this->delta; + subdistance /= this->delta; - if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) - { - if ( -0.5 > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] || + if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) + { + if ( -0.5 > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] || subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ) - { - this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance; + { + this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance; this->qPatches[ this->qIndices[index] ] = triangle.patchIndex; - } - } - } + } + } + } } bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex & point, const Triangle & triangle) const { - Vertex pointOnTriangle, direction; - real subdistance; - int error; - for (int i = distribution.dir_start; i <= distribution.dir_end; i++) - { + Vertex pointOnTriangle, direction; + real subdistance; + int error; + for (int i = distribution.dir_start; i <= distribution.dir_end; i++) + { #if defined(__CUDA_ARCH__) - direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]); + direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]); #else - direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]), + direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]), real(distribution.dirs[i * DIMENSION + 1]), - real(distribution.dirs[i * DIMENSION + 2])); + real(distribution.dirs[i * DIMENSION + 2])); #endif - uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, - point.y + direction.y * this->delta, - point.z + direction.z * this->delta); - if (neighborIndex == INVALID_INDEX) continue; + uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta, + point.y + direction.y * this->delta, + point.z + direction.z * this->delta); + if (neighborIndex == INVALID_INDEX) continue; - error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance); + error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance); - subdistance /= this->delta; + subdistance /= this->delta; - if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) - { - return true; - } - } + if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0)) + { + return true; + } + } return false; } void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) { for( uint index = 0; index < this->size; index++ ){ - real x, y, z; this->transIndexToCoords(index, x, y, z); - + if( this->getFieldEntry(index) == INVALID_OUT_OF_GRID || this->getFieldEntry(index) == INVALID_SOLID || this->getFieldEntry(index) == INVALID_COARSE_UNDER_FINE || @@ -1660,7 +1664,6 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_OUT_OF_GRID_BOUNDARY ) continue; if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_SOLID ) continue; - if( direction == CommunicationDirections::MX ) findCommunicationIndex( index, x, subDomainBox->minX, direction); if( direction == CommunicationDirections::PX ) findCommunicationIndex( index, x, subDomainBox->maxX, direction); if( direction == CommunicationDirections::MY ) findCommunicationIndex( index, y, subDomainBox->minY, direction); @@ -1672,16 +1675,13 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai void GridImp::findCommunicationIndex( uint index, real coordinate, real limit, int direction ){ // negative direction get a negative sign - real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 ); - + real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 ); - if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta) { - this->communicationIndices[direction].receiveIndices.push_back(index); - } + if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta) + this->communicationIndices[direction].receiveIndices.push_back(index); - if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta) { - this->communicationIndices[direction].sendIndices.push_back(index); - } + if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta) + this->communicationIndices[direction].sendIndices.push_back(index); } bool GridImp::isSendNode(int index) const @@ -1727,14 +1727,14 @@ uint GridImp::getReceiveIndex(int direction, uint index) void GridImp::repairCommunicationIndices(int direction) { - this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(), - this->communicationIndices[direction+1].sendIndices.begin(), + this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(), + this->communicationIndices[direction+1].sendIndices.begin(), this->communicationIndices[direction+1].sendIndices.end() ); - this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(), - this->communicationIndices[direction].receiveIndices.begin(), + this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(), + this->communicationIndices[direction].receiveIndices.begin(), this->communicationIndices[direction].receiveIndices.end() ); this->communicationIndices[direction].receiveIndices = this->communicationIndices[direction+1].receiveIndices; @@ -1839,19 +1839,19 @@ real GridImp::getMaximumOnNodes(const real &maxExact, const real &decimalStart, return maxNode; } -uint GridImp::getXIndex(real x) const -{ - return std::lround((x - startX) / delta); +uint GridImp::getXIndex(real x) const +{ + return std::lround((x - startX) / delta); } uint GridImp::getYIndex(real y) const -{ - return std::lround((y - startY) / delta); +{ + return std::lround((y - startY) / delta); } uint GridImp::getZIndex(real z) const -{ - return std::lround((z - startZ) / delta); +{ + return std::lround((z - startZ) / delta); } real GridImp::getDelta() const @@ -1866,11 +1866,11 @@ uint GridImp::getSize() const uint GridImp::getSparseSize() const { - return this->sparseSize; + return this->sparseSize; } -uint GridImp::getNumberOfFluidNodes() const { - return (uint)this->fluidNodeIndices.size(); +uint GridImp::getNumberOfFluidNodes() const { + return (uint)this->fluidNodeIndices.size(); } Field GridImp::getField() const @@ -2063,23 +2063,147 @@ void GridImp::getNodeValues(real *xCoords, real *yCoords, real *zCoords, uint *n } } -void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const -{ +void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const +{ for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndices.size(); nodeNumber++) fluidNodeIndices[nodeNumber] = this->fluidNodeIndices[nodeNumber]; } -uint GridImp::getNumberOfFluidNodesBorder() const -{ - return (uint)this->fluidNodeIndicesBorder.size(); +uint GridImp::getNumberOfFluidNodesBorder() const +{ + return (uint)this->fluidNodeIndicesBorder.size(); } -void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const +void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const { for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndicesBorder.size(); nodeNumber++) fluidNodeIndicesBorder[nodeNumber] = this->fluidNodeIndicesBorder[nodeNumber]; } +void GridImp::addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars) +{ + size_t newSize = this->fluidNodeIndicesMacroVars.size()+_fluidNodeIndicesMacroVars.size(); + this->fluidNodeIndicesMacroVars.reserve(newSize); + std::copy(_fluidNodeIndicesMacroVars.begin(), _fluidNodeIndicesMacroVars.end(), std::back_inserter(this->fluidNodeIndicesMacroVars)); +} + +void GridImp::addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce) +{ + + size_t newSize = this->fluidNodeIndicesApplyBodyForce.size()+_fluidNodeIndicesApplyBodyForce.size(); + this->fluidNodeIndicesApplyBodyForce.reserve(newSize); + std::copy(_fluidNodeIndicesApplyBodyForce.begin(), _fluidNodeIndicesApplyBodyForce.end(), std::back_inserter(this->fluidNodeIndicesApplyBodyForce)); +} + +void GridImp::addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) +{ + + size_t newSize = this->fluidNodeIndicesAllFeatures.size()+_fluidNodeIndicesAllFeatures.size(); + this->fluidNodeIndicesAllFeatures.reserve(newSize); + std::copy(_fluidNodeIndicesAllFeatures.begin(), _fluidNodeIndicesAllFeatures.end(), std::back_inserter(this->fluidNodeIndicesAllFeatures)); +} + +void GridImp::sortFluidNodeIndicesMacroVars() +{ + if(this->fluidNodeIndicesMacroVars.size()>0) + { + sort(this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end()); + // Remove duplicates + this->fluidNodeIndicesMacroVars.erase( unique( this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end() ), this->fluidNodeIndicesMacroVars.end() ); + + // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars + if(this->fluidNodeIndicesAllFeatures.size()>0) + { + this->fluidNodeIndicesMacroVars.erase( std::remove_if( this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(), + [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), + this->fluidNodeIndicesMacroVars.end() + ); + } + + // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices + this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), + [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ), + this->fluidNodeIndices.end() + ); + } +} + +void GridImp::sortFluidNodeIndicesApplyBodyForce() +{ + if(this->fluidNodeIndicesApplyBodyForce.size()>0) + { + sort(this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end()); + // Remove duplicates + this->fluidNodeIndicesApplyBodyForce.erase( unique( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end() ), this->fluidNodeIndicesApplyBodyForce.end() ); + + // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars + if(this->fluidNodeIndicesAllFeatures.size()>0) + { + this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(), + [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), + this->fluidNodeIndicesApplyBodyForce.end() + ); + } + + // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices + this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), + [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ), + this->fluidNodeIndices.end() + ); + } +} + +void GridImp::sortFluidNodeIndicesAllFeatures() +{ + if(this->fluidNodeIndicesAllFeatures.size()>0) + { + sort(this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end()); + // Remove duplicates + this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() ); + // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices + this->fluidNodeIndices.erase( std::remove_if( this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), + [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ), + this->fluidNodeIndices.end() + ); + } +} + +uint GridImp::getNumberOfFluidNodeIndicesMacroVars() const { + return (uint)this->fluidNodeIndicesMacroVars.size(); +} + +uint GridImp::getNumberOfFluidNodeIndicesApplyBodyForce() const { + return (uint)this->fluidNodeIndicesApplyBodyForce.size(); +} + +uint GridImp::getNumberOfFluidNodeIndicesAllFeatures() const { + return (uint)this->fluidNodeIndicesAllFeatures.size(); +} + +void GridImp::getFluidNodeIndicesMacroVars(uint *_fluidNodeIndicesMacroVars) const +{ + std::copy(fluidNodeIndicesMacroVars.begin(), fluidNodeIndicesMacroVars.end(), _fluidNodeIndicesMacroVars); +} +void GridImp::getFluidNodeIndicesApplyBodyForce(uint *_fluidNodeIndicesApplyBodyForce) const +{ + std::copy(fluidNodeIndicesApplyBodyForce.begin(), fluidNodeIndicesApplyBodyForce.end(), _fluidNodeIndicesApplyBodyForce); +} +void GridImp::getFluidNodeIndicesAllFeatures(uint *_fluidNodeIndicesAllFeatures) const +{ + std::copy(fluidNodeIndicesAllFeatures.begin(), fluidNodeIndicesAllFeatures.end(), _fluidNodeIndicesAllFeatures); +} + + +std::vector<SideType> GridImp::getBCAlreadySet() { + return this->bcAlreadySet; +} + +void GridImp::addBCalreadySet(SideType side) +{ + this->bcAlreadySet.push_back(side); +} + + void GridImp::print() const { printf("min: (%2.4f, %2.4f, %2.4f), max: (%2.4f, %2.4f, %2.4f), size: %d, delta: %2.4f\n", startX, startY, startZ, @@ -2087,3 +2211,10 @@ void GridImp::print() const if(this->gridInterface) this->gridInterface->print(); } + +bool GridImp::isStopperForBC(uint index) const +{ + return (this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY || + this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID || + this->getFieldEntry(index) == vf::gpu::STOPPER_SOLID); +} diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index edb5ca916bf68dcf992ea214dcddb2dc43810352..2cd322ebed78daaf135ad97b881923ca5831bbcd 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -34,6 +34,7 @@ #define GRID_IMP_H #include <array> +#include <vector> #include "Core/LbmOrGks.h" @@ -52,6 +53,7 @@ class Object; class BoundingBox; class TriangularMeshDiscretizationStrategy; + #ifdef __GNUC__ #ifndef __clang__ #pragma push @@ -76,7 +78,7 @@ protected: public: static SPtr<GridImp> makeShared(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level); - virtual ~GridImp() = default; + ~GridImp() override = default; private: void initalNumberOfNodesAndSize(); @@ -92,6 +94,7 @@ private: bool nodeInPreviousCellIs(int index, char type) const; bool nodeInCellIs(Cell& cell, char type) const override; + uint getXIndex(real x) const; uint getYIndex(real y) const; uint getZIndex(real z) const; @@ -115,8 +118,11 @@ private: int *sparseIndices; - std::vector<uint> fluidNodeIndices; - std::vector<uint> fluidNodeIndicesBorder; + std::vector<uint> fluidNodeIndices; // run on CollisionTemplate::Default + std::vector<uint> fluidNodeIndicesBorder; // run on subdomain border nodes (CollisionTemplate::SubDomainBorder) + std::vector<uint> fluidNodeIndicesMacroVars; // run on CollisionTemplate::MacroVars + std::vector<uint> fluidNodeIndicesApplyBodyForce; // run on CollisionTemplate::ApplyBodyForce + std::vector<uint> fluidNodeIndicesAllFeatures; // run on CollisionTemplate::AllFeatures uint *qIndices; //maps from matrix index to qIndex real *qValues; @@ -132,6 +138,8 @@ private: bool enableFixRefinementIntoTheWall; + std::vector<SideType> bcAlreadySet; + protected: Field field; int *neighborIndexX, *neighborIndexY, *neighborIndexZ, *neighborIndexNegative; @@ -146,9 +154,9 @@ public: void setPeriodicityY(bool periodicity) override; void setPeriodicityZ(bool periodicity) override; - bool getPeriodicityX() override; - bool getPeriodicityY() override; - bool getPeriodicityZ() override; + bool getPeriodicityX() const override; + bool getPeriodicityY() const override; + bool getPeriodicityZ() const override; void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) override; @@ -182,6 +190,9 @@ public: void setNumberOfLayers(uint numberOfLayers) override; + std::vector<SideType> getBCAlreadySet() override; + void addBCalreadySet(SideType side) override; + public: Distribution distribution; @@ -216,6 +227,7 @@ public: bool nodeInNextCellIs(int index, char type) const; bool hasAllNeighbors(uint index) const; bool hasNeighborOfType(uint index, char type) const; + bool nodeHasBC(uint index) const override; bool cellContainsOnly(Cell &cell, char type) const; bool cellContainsOnly(Cell &cell, char typeA, char typeB) const; @@ -256,6 +268,8 @@ public: static void getGridInterface(uint *gridInterfaceList, const uint *oldGridInterfaceList, uint size); bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const override; + + bool isStopperForBC(uint index) const override; int *getNeighborsX() const override; int* getNeighborsY() const override; @@ -273,7 +287,7 @@ public: void print() const; public: - virtual void findSparseIndices(SPtr<Grid> fineGrid) override; + void findSparseIndices(SPtr<Grid> fineGrid) override; void findForGridInterfaceNewIndices(SPtr<GridImp> fineGrid); void updateSparseIndices(); @@ -364,6 +378,19 @@ public: uint getNumberOfFluidNodesBorder() const override; void getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const override; + void addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMacroVars) override; + void addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce) override; + void addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) override; + void sortFluidNodeIndicesMacroVars() override; + void sortFluidNodeIndicesApplyBodyForce() override; + void sortFluidNodeIndicesAllFeatures() override; + + uint getNumberOfFluidNodeIndicesMacroVars() const override; + uint getNumberOfFluidNodeIndicesApplyBodyForce() const override; + uint getNumberOfFluidNodeIndicesAllFeatures() const override; + void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars) const override; + void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce) const override; + void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures) const override; public: struct CommunicationIndices { diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp index 23fb0f4e7f3e16702e9cb2459606986af1032e49..0238434dc87b453dc21164577d8abd4ce1819793 100644 --- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp +++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp @@ -417,10 +417,10 @@ void SimulationFileWriter::writeGridInterfaceToFile(SPtr<GridBuilder> builder, u } } -void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, std::ofstream& coarseFile, uint* coarse, std::ofstream& fineFile, uint* fine) +void SimulationFileWriter::writeGridInterfaceToFile(uint numberOfNodes, std::ofstream &coarseFile, uint *coarse, + std::ofstream &fineFile, uint *fine) { - for (uint index = 0; index < numberOfNodes; index++) - { + for (uint index = 0; index < numberOfNodes; index++) { coarseFile << coarse[index] << " \n"; fineFile << fine[index] << " \n"; } @@ -428,17 +428,15 @@ void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, st fineFile << "\n"; } -void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream & offsetFile, real* offset_X, real* offset_Y, real* offset_Z) +void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream &offsetFile, real *offset_X, + real *offset_Y, real *offset_Z) { - for (uint index = 0; index < numberOfNodes; index++) - { + for (uint index = 0; index < numberOfNodes; index++) { offsetFile << offset_X[index] << " " << offset_Y[index] << " " << offset_Z[index] << " \n"; } offsetFile << "\n"; } - - /*#################################################################################*/ /*---------------------------------private methods---------------------------------*/ /*---------------------------------------------------------------------------------*/ diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index 759528e5346ba8d9899cb90eb64503b20a44c4fc..ed647cb406bca23ef90667b7d17171c7b3f46283 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -8,7 +8,7 @@ if(MSVC) set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix endif() -vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda) +vf_add_library(PUBLIC_LINK basics lbm PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda) #SET(TPN_WIN32 "/EHsc") #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp index f8f5c42b835a1a4ba55e378e624230bbb43dc05a..e3f344231dc9d5e19c09f7ce1fde7d31f1770232 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp @@ -17,11 +17,11 @@ void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager) void init2ndMoments(Parameter* para) { - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { ////////////////////////////////////////////////////////////////////////// //init host arrays - for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { para->getParH(lev)->kxyFromfcNEQ[pos] = 0.0; para->getParH(lev)->kyzFromfcNEQ[pos] = 0.0; @@ -116,7 +116,7 @@ void init3rdMoments(Parameter* para) { ////////////////////////////////////////////////////////////////////////// //init host arrays - for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { para->getParH(lev)->CUMbbb[pos] = 0.0; para->getParH(lev)->CUMabc[pos] = 0.0; @@ -198,7 +198,7 @@ void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager) void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager) { - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { ////////////////////////////////////////////////////////////////////////// //allocation (device-memory + host-memory) @@ -211,11 +211,11 @@ void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManag void initHigherOrderMoments(Parameter* para) { - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { ////////////////////////////////////////////////////////////////////////// //init host arrays - for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { para->getParH(lev)->CUMcbb[pos] = 0.0; para->getParH(lev)->CUMbcb[pos] = 0.0; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp index 77db571f7f10e0ea0bff827400270dd074d4e666..80a667f91976b745b619fed5d5763b5429a6559c 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp @@ -11,16 +11,16 @@ void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager) { - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { cudaMemoryManager->cudaAllocMedianOut(lev); - for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { - para->getParH(lev)->vx_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->vy_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->vz_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->rho_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0; + para->getParH(lev)->vx_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->vy_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->vz_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->rho_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0; } } } @@ -31,15 +31,15 @@ void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager) void calcMedian(Parameter* para, uint tdiff) { - for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) + for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { - for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { - para->getParH(lev)->vx_SP_Med_Out[i] = para->getParH(lev)->vx_SP_Med[i] / (real)tdiff; - para->getParH(lev)->vy_SP_Med_Out[i] = para->getParH(lev)->vy_SP_Med[i] / (real)tdiff; - para->getParH(lev)->vz_SP_Med_Out[i] = para->getParH(lev)->vz_SP_Med[i] / (real)tdiff; - para->getParH(lev)->rho_SP_Med_Out[i] = para->getParH(lev)->rho_SP_Med[i] / (real)tdiff; - para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i]/ (real)tdiff; + para->getParH(lev)->vx_SP_Med_Out[pos] = para->getParH(lev)->vx_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->vy_SP_Med_Out[pos] = para->getParH(lev)->vy_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->vz_SP_Med_Out[pos] = para->getParH(lev)->vz_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->rho_SP_Med_Out[pos] = para->getParH(lev)->rho_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos]/ (real)tdiff; } } } @@ -75,14 +75,14 @@ void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager) for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { cudaMemoryManager->cudaAllocMedianOutAD(lev); - for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { - para->getParH(lev)->vx_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->vy_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->vz_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->rho_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0; - para->getParH(lev)->Conc_Med_Out[i] = (real)0.0; + para->getParH(lev)->vx_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->vy_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->vz_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->rho_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0; + para->getParH(lev)->Conc_Med_Out[pos] = (real)0.0; } } } @@ -95,14 +95,14 @@ void calcMedianAD(Parameter* para, uint tdiff) { for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { - for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++) + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { - para->getParH(lev)->vx_SP_Med_Out[i] = para->getParH(lev)->vx_SP_Med[i] / (real)tdiff; - para->getParH(lev)->vy_SP_Med_Out[i] = para->getParH(lev)->vy_SP_Med[i] / (real)tdiff; - para->getParH(lev)->vz_SP_Med_Out[i] = para->getParH(lev)->vz_SP_Med[i] / (real)tdiff; - para->getParH(lev)->rho_SP_Med_Out[i] = para->getParH(lev)->rho_SP_Med[i] / (real)tdiff; - para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i] / (real)tdiff; - para->getParH(lev)->Conc_Med_Out[i] = para->getParH(lev)->Conc_Med[i] / (real)tdiff; + para->getParH(lev)->vx_SP_Med_Out[pos] = para->getParH(lev)->vx_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->vy_SP_Med_Out[pos] = para->getParH(lev)->vy_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->vz_SP_Med_Out[pos] = para->getParH(lev)->vz_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->rho_SP_Med_Out[pos] = para->getParH(lev)->rho_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos] / (real)tdiff; + para->getParH(lev)->Conc_Med_Out[pos] = para->getParH(lev)->Conc_Med[pos] / (real)tdiff; } } } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp index e91fb6f5c232bd98073a1c930149693f8af4b078..9572252965e1c619702370f8b9a3756bf035035e 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp @@ -25,32 +25,32 @@ void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryM for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) { cudaMemoryManager->cudaCopyTurbulenceIntensityDH(lev, para->getParH(lev)->numberOfNodes); - for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++) { + for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) { // mean velocity - para->getParH(lev)->vx_mean[i] = para->getParH(lev)->vx_mean[i] / (real)tdiff; - para->getParH(lev)->vy_mean[i] = para->getParH(lev)->vy_mean[i] / (real)tdiff; - para->getParH(lev)->vz_mean[i] = para->getParH(lev)->vz_mean[i] / (real)tdiff; + para->getParH(lev)->vx_mean[pos] = para->getParH(lev)->vx_mean[pos] / (real)tdiff; + para->getParH(lev)->vy_mean[pos] = para->getParH(lev)->vy_mean[pos] / (real)tdiff; + para->getParH(lev)->vz_mean[pos] = para->getParH(lev)->vz_mean[pos] / (real)tdiff; // fluctuations - para->getParH(lev)->vxx[i] = para->getParH(lev)->vxx[i] / (real)tdiff; - para->getParH(lev)->vyy[i] = para->getParH(lev)->vyy[i] / (real)tdiff; - para->getParH(lev)->vzz[i] = para->getParH(lev)->vzz[i] / (real)tdiff; - para->getParH(lev)->vxy[i] = para->getParH(lev)->vxy[i] / (real)tdiff; - para->getParH(lev)->vxz[i] = para->getParH(lev)->vxz[i] / (real)tdiff; - para->getParH(lev)->vyz[i] = para->getParH(lev)->vyz[i] / (real)tdiff; - - para->getParH(lev)->vxx[i] = - para->getParH(lev)->vxx[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vx_mean[i]; - para->getParH(lev)->vyy[i] = - para->getParH(lev)->vyy[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vy_mean[i]; - para->getParH(lev)->vzz[i] = - para->getParH(lev)->vzz[i] - para->getParH(lev)->vz_mean[i] * para->getParH(lev)->vz_mean[i]; - para->getParH(lev)->vxy[i] = - para->getParH(lev)->vxy[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vy_mean[i]; - para->getParH(lev)->vxz[i] = - para->getParH(lev)->vxz[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vz_mean[i]; - para->getParH(lev)->vyz[i] = - para->getParH(lev)->vyz[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vz_mean[i]; + para->getParH(lev)->vxx[pos] = para->getParH(lev)->vxx[pos] / (real)tdiff; + para->getParH(lev)->vyy[pos] = para->getParH(lev)->vyy[pos] / (real)tdiff; + para->getParH(lev)->vzz[pos] = para->getParH(lev)->vzz[pos] / (real)tdiff; + para->getParH(lev)->vxy[pos] = para->getParH(lev)->vxy[pos] / (real)tdiff; + para->getParH(lev)->vxz[pos] = para->getParH(lev)->vxz[pos] / (real)tdiff; + para->getParH(lev)->vyz[pos] = para->getParH(lev)->vyz[pos] / (real)tdiff; + + para->getParH(lev)->vxx[pos] = + para->getParH(lev)->vxx[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vx_mean[pos]; + para->getParH(lev)->vyy[pos] = + para->getParH(lev)->vyy[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vy_mean[pos]; + para->getParH(lev)->vzz[pos] = + para->getParH(lev)->vzz[pos] - para->getParH(lev)->vz_mean[pos] * para->getParH(lev)->vz_mean[pos]; + para->getParH(lev)->vxy[pos] = + para->getParH(lev)->vxy[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vy_mean[pos]; + para->getParH(lev)->vxz[pos] = + para->getParH(lev)->vxz[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vz_mean[pos]; + para->getParH(lev)->vyz[pos] = + para->getParH(lev)->vyz[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vz_mean[pos]; } } } @@ -146,7 +146,7 @@ void writeAllTiDatafToFile(Parameter *para, uint timestep) } } -void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data, +void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data, std::vector<std::string> &datanames) { //////////////////////////////////////////////////////////////////////// @@ -169,10 +169,10 @@ void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std:: ostr << std::endl; //////////////////////////////////////////////////////////////////////// // fill file with data - for (int i = 0; i < sizeOfTiArray; i++) { - ostr << i; + for (size_t pos = 0; pos < sizeOfTiArray; pos++) { + ostr << pos; for (auto dataset : data) - ostr << "\t" << dataset[i]; + ostr << "\t" << dataset[pos]; ostr << std::endl; } //////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h index f70973eb5921a17c3229a026623de2a0ef9f3ce4..a76c2d0dde99ad9fb3fd38137b6c72e5c3f5a6c3 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h @@ -18,7 +18,7 @@ void writeVeloFluctuationToFile(Parameter *para, uint timeste); void writeVeloMeansToFile(Parameter *para, uint timestep); void writeAllTiDatafToFile(Parameter *para, uint timestep); -void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data, +void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data, std::vector<std::string> &datanames); #endif diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp index 4a14d19c10936f84379f332ef24f081f0ebb0cb7..49543f37df7fb54290f4ab6c09edb8d10c0b67be 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp @@ -39,8 +39,14 @@ void CollisionAndExchange_noStreams_indexKernel::operator()(UpdateGrid27 *update //! //! 1. run collision //! - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, -1); + for( CollisionTemplate tag: para->getParH(level)->allocatedBulkFluidNodeTags ) + { + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->taggedFluidNodeIndices[tag], + para->getParD(level)->numberOfTaggedFluidNodes[tag], + tag, + CudaStreamIndex::Legacy); + } //! 2. exchange information between GPUs updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false); @@ -61,28 +67,35 @@ void CollisionAndExchange_noStreams_oldKernel::operator()(UpdateGrid27 *updateGr void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t) { - int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); - int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); - //! \details steps: //! - //! 1. run collision for nodes which are at the border of the gpus/processes - //! - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndicesBorder, - para->getParD(level)->numberOfFluidNodesBorder, borderStreamIndex); + //! 1. run collision for nodes which are at the border of the gpus/processes, running with WriteMacroVars in case probes sample on these nodes + //! + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], + para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], + CollisionTemplate::WriteMacroVars, + CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex); + updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); - - //! 3. launch the collision kernel for bulk nodes - //! - para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); - + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); + + //! 3. launch the collision kernel for bulk nodes. This includes nodes with \param tag Default, WriteMacroVars, ApplyBodyForce, + //! or AllFeatures. All assigned tags are listed in \param allocatedBulkFluidNodeTags during initialization in Simulation::init + + para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk); + + for( CollisionTemplate tag: para->getParH(level)->allocatedBulkFluidNodeTags ) + { + updateGrid->collisionUsingIndices( level, t, + para->getParD(level)->taggedFluidNodeIndices[tag], + para->getParD(level)->numberOfTaggedFluidNodes[tag], + tag, + CudaStreamIndex::Bulk); + } //! 4. exchange information between GPUs - updateGrid->exchangeMultiGPU(level, borderStreamIndex); + updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp index d62e8fee24dad1cde7ccd2044a5a5f9573f7ff82..cc1d2eb748b01835b46f5fc69f47ed3ddc17a28d 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp @@ -53,7 +53,7 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan { ////////////////////////////////////////////////////////////////////// //measure the velocity - int numberOfElements = para->getParH(lev)->numberOfNodes; + unsigned long long numberOfElements = para->getParH(lev)->numberOfNodes; if (numberOfElements > 0) { CalcMacCompSP27(para->getParD(lev)->velocityX, @@ -74,11 +74,11 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan cudaMemoryManager->cudaCopyPrint(lev); // para->cudaCopyForceVelo(i,numberOfElements); ////////////////////////////////////////////////////////////////// - for (int j = 0; j < numberOfElements; j++) + for (size_t pos = 0; pos < numberOfElements; pos++) { - tempVeloX += (double)para->getParH(lev)->velocityX[j]; - tempVeloY += (double)para->getParH(lev)->velocityY[j]; - tempVeloZ += (double)para->getParH(lev)->velocityZ[j]; + tempVeloX += (double)para->getParH(lev)->velocityX[pos]; + tempVeloY += (double)para->getParH(lev)->velocityY[pos]; + tempVeloZ += (double)para->getParH(lev)->velocityZ[pos]; } tempVeloX /= (double)numberOfElements; tempVeloY /= (double)numberOfElements; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp index cd74216e1fbe7b718c72046ace4b7d2e7cf451fe..b8ca4e9c2020e17cd0192267ac5d931b510afc3a 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp @@ -38,67 +38,62 @@ void NoRefinement::operator()(UpdateGrid27 *updateGrid, Parameter *para, int lev void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level) { - int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); - int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); - //! \details steps: //! //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, borderStreamIndex); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPUAfterFtoC(level, borderStreamIndex); + updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine) //! - para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, bulkStreamIndex); - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, bulkStreamIndex); + para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder); //! 4. exchange information between GPUs (only nodes which are part of the interpolation) //! - updateGrid->exchangeMultiGPUAfterFtoC(level, borderStreamIndex); + updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder); // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, borderStreamIndex); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder); cudaDeviceSynchronize(); } -void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level){ - int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); - int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); - +void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level) +{ //! \details steps: //! //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, borderStreamIndex); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder); //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished //! - updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex); + updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); + para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder); //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine) //! - para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, bulkStreamIndex); - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, bulkStreamIndex); + para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder); //! 4. exchange information between GPUs (all nodes) //! - updateGrid->exchangeMultiGPU(level, borderStreamIndex); + updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder); // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes //! - updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, borderStreamIndex); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder); cudaDeviceSynchronize(); } @@ -109,14 +104,14 @@ void RefinementAndExchange_noStreams_exchangeInterface::operator()(UpdateGrid27 //! //! 1. interpolation fine to coarse //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, -1); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy); //! 2. exchange information between GPUs (only nodes which are part of the interpolation) //! updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, true); //! 3. interpolation coarse to fine - updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, -1); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy); } void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level) @@ -125,14 +120,14 @@ void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 * //! //! 1. interpolation fine to coarse //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, -1); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy); //! 2. exchange information between GPUs (all nodes) //! updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false); //! 3. interpolation coarse to fine - updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, -1); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy); } void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level) @@ -141,7 +136,7 @@ void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para //! //! 1. interpolation fine to coarse //! - updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, -1); + updateGrid->fineToCoarse(level, ¶->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy); //! 2. interpolation coarse to fine - updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, -1); + updateGrid->coarseToFine(level, ¶->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy); } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 296ab819c5538a6b6d6a6827b5c28cbc475af838..4136614dfbfc9e0d2fc1bf7f4b01624f94eabb6f 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -22,13 +22,17 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) updateGrid(level + 1, t); } + ////////////////////////////////////////////////////////////////////////// + + interactWithProbes(level, t); + ////////////////////////////////////////////////////////////////////////// collision(this, para.get(), level, t); ////////////////////////////////////////////////////////////////////////// - postCollisionBC(level); + postCollisionBC(level, t); ////////////////////////////////////////////////////////////////////////// @@ -47,13 +51,14 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) ////////////////////////////////////////////////////////////////////////// if( level != para->getFine() ) - { + { refinement(this, para.get(), level); } + ////////////////////////////////////////////////////////////////////////// + interactWithActuators(level, t); - interactWithProbes(level, t); } void UpdateGrid27::collisionAllNodes(int level, unsigned int t) @@ -71,15 +76,16 @@ void UpdateGrid27::collisionAllNodes(int level, unsigned int t) collisionAdvectionDiffusion(level); } -void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream) +void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *taggedFluidNodeIndices, uint numberOfTaggedFluidNodes, CollisionTemplate collisionTemplate, CudaStreamIndex stream) { - if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0) - kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream); + if (taggedFluidNodeIndices != nullptr && numberOfTaggedFluidNodes != 0) + kernels.at(level)->runOnIndices(taggedFluidNodeIndices, numberOfTaggedFluidNodes, collisionTemplate, stream); else - std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not definded" + std::cout << "In collision: fluidNodeIndices or numberOfFluidNodes not defined" << std::endl; ////////////////////////////////////////////////////////////////////////// + //! \todo: AD collision and porousMedia should be called separately, not in collisionUsingIndices if (para->getSimulatePorousMedia()) collisionPorousMedia(level); @@ -118,21 +124,21 @@ void UpdateGrid27::collisionAdvectionDiffusion(int level) this->adKernelManager->runADcollisionKernel(level); } -void UpdateGrid27::prepareExchangeMultiGPU(int level, int streamIndex) +void UpdateGrid27::prepareExchangeMultiGPU(int level, CudaStreamIndex streamIndex) { prepareExchangeCollDataXGPU27AllNodes(para.get(), level, streamIndex); prepareExchangeCollDataYGPU27AllNodes(para.get(), level, streamIndex); prepareExchangeCollDataZGPU27AllNodes(para.get(), level, streamIndex); } -void UpdateGrid27::prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex) +void UpdateGrid27::prepareExchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex) { prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, streamIndex); prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, streamIndex); prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, streamIndex); } -void UpdateGrid27::exchangeMultiGPU(int level, int streamIndex) +void UpdateGrid27::exchangeMultiGPU(int level, CudaStreamIndex streamIndex) { ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition @@ -168,30 +174,30 @@ void UpdateGrid27::exchangeMultiGPU_noStreams_withPrepare(int level, bool useRed // 3D domain decomposition if (useReducedComm) { // X - prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, -1); - exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, -1); + prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); // Y - prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, -1); - exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, -1); + prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); // Z - prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, -1); - exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, -1); + prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, CudaStreamIndex::Legacy); } else { // X - prepareExchangeCollDataXGPU27AllNodes(para.get(), level, -1); - exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, -1); + prepareExchangeCollDataXGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); // Y - prepareExchangeCollDataYGPU27AllNodes(para.get(), level, -1); - exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, -1); + prepareExchangeCollDataYGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); // Z - prepareExchangeCollDataZGPU27AllNodes(para.get(), level, -1); - exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1); - scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, -1); + prepareExchangeCollDataZGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); + exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, CudaStreamIndex::Legacy); + scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, CudaStreamIndex::Legacy); } ////////////////////////////////////////////////////////////////////////// @@ -204,7 +210,7 @@ void UpdateGrid27::exchangeMultiGPU_noStreams_withPrepare(int level, bool useRed exchangePostCollDataADZGPU27(para.get(), comm, cudaMemoryManager.get(), level); } } -void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex) +void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex) { ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition @@ -227,9 +233,10 @@ void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex) } } -void UpdateGrid27::postCollisionBC(int level) +void UpdateGrid27::postCollisionBC(int level, uint t) { ////////////////////////////////////////////////////////////////////////// + // G E O M E T R Y // V E L O C I T Y (I N F L O W) this->bcKernelManager->runVelocityBCKernelPost(level); @@ -257,6 +264,10 @@ void UpdateGrid27::postCollisionBC(int level) // P R E S S U R E this->bcKernelManager->runPressureBCKernelPost(level); + ////////////////////////////////////////////////////////////////////////// + // P R E C U R S O R + this->bcKernelManager->runPrecursorBCKernelPost(level, t, cudaMemoryManager.get()); + ////////////////////////////////////////////////////////////////////////// // A D V E C T I O N D I F F U S I O N if (para->getDiffOn()) @@ -317,13 +328,12 @@ void UpdateGrid27::preCollisionBC(int level, unsigned int t) ////////////////////////////////////////////////////////////////////////////////// } -void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, - int streamIndex) +void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex) { gridScalingKernelManager->runFineToCoarseKernelLB(level, icellFC, offFC, streamIndex); if (para->getDiffOn()) { - if (streamIndex != -1) { + if (para->getStreamManager()->streamIsRegistered(streamIndex)) { printf("fineToCoarse Advection Diffusion not implemented"); // TODO return; } @@ -331,14 +341,13 @@ void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC & } } -void UpdateGrid27::coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, - int streamIndex) +void UpdateGrid27::coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex) { this->gridScalingKernelManager->runCoarseToFineKernelLB(level, icellCF, offCF, streamIndex); if (para->getDiffOn()) { - if (streamIndex != -1){ + if(para->getStreamManager()->streamIsRegistered(streamIndex)){ printf("CoarseToFineWithStream Advection Diffusion not implemented"); // TODO return; } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 8110923bf066412e2bb09ffa1f10efe3ddc983c7..8ce2cf5bfd72f9f53cdb35bc92502ee9ca0d3ad8 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -4,6 +4,7 @@ #include "LBM/LB.h" #include "GPU/GPU_Interface.h" #include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" #include "GPU/CudaMemoryManager.h" #include "Communication/Communicator.h" #include "Calculation/PorousMedia.h" @@ -15,7 +16,6 @@ class Kernel; class BoundaryConditionFactory; class GridScalingFactory; class TurbulenceModelFactory; - class UpdateGrid27; using CollisionStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level, unsigned int t)>; using RefinementStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level)>; @@ -31,21 +31,21 @@ public: private: void collisionAllNodes(int level, unsigned int t); - void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1); + void collisionUsingIndices(int level, unsigned int t, uint *taggedFluidNodeIndices = nullptr, uint numberOfTaggedFluidNodes = 0, CollisionTemplate collisionTemplate = CollisionTemplate::Default, CudaStreamIndex streamIndex=CudaStreamIndex::Legacy); void collisionAdvectionDiffusion(int level); - void postCollisionBC(int level); + void postCollisionBC(int level, unsigned int t); void preCollisionBC(int level, unsigned int t); void collisionPorousMedia(int level); - void fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, int streamIndex); - void coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, int streamIndex); + void fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex); + void coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex); - void prepareExchangeMultiGPU(int level, int streamIndex); - void prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex); + void prepareExchangeMultiGPU(int level, CudaStreamIndex streamIndex); + void prepareExchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex); - void exchangeMultiGPU(int level, int streamIndex); - void exchangeMultiGPUAfterFtoC(int level, int streamIndex); + void exchangeMultiGPU(int level, CudaStreamIndex streamIndex); + void exchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex); void exchangeMultiGPU_noStreams_withPrepare(int level, bool useReducedComm); void swapBetweenEvenAndOddTimestep(int level); @@ -60,6 +60,7 @@ private: friend class CollisionAndExchange_noStreams_indexKernel; friend class CollisionAndExchange_noStreams_oldKernel; friend class CollisionAndExchange_streams; + friend class CollisionAndExchange_noStreams_withReadWriteFlags; RefinementStrategy refinement; friend class RefinementAndExchange_streams_exchangeInterface; diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 36c250401e0775b3abcc7d25c0f89fde0556631e..00a7b45668e2050467f3d1122455dc74d0ad4f1c 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -11,12 +11,12 @@ using namespace vf::lbm::dir; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // 3D domain decomposition: functions used by all directions //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex, +void collectNodesInSendBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighbor, unsigned int numberOfSendProcessNeighbors) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); + for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) { GetSendFsPostDev27(para->getParD(level)->distributions.f[0], (*sendProcessNeighbor)[i].f[0], @@ -32,11 +32,11 @@ void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex, } } -void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex, +void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, unsigned int numberOfRecvProcessNeighbors) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); for (unsigned int i = 0; i < numberOfRecvProcessNeighbors; i++) { SetRecvFsPostDev27(para->getParD(level)->distributions.f[0], (*recvProcessNeighborDev)[i].f[0], @@ -105,22 +105,22 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // X //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborX, (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } -void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCX, (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborX, ¶->getParD(level)->recvProcessNeighborX, ¶->getParH(level)->sendProcessNeighborX, @@ -128,40 +128,40 @@ void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm } void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCX, ¶->getParD(level)->recvProcessNeighborsAfterFtoCX, ¶->getParH(level)->sendProcessNeighborsAfterFtoCX, ¶->getParH(level)->recvProcessNeighborsAfterFtoCX); } -void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { - scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborX, + scatterNodesFromRecvBufferGPU(para, level, streamIndex,¶->getParD(level)->recvProcessNeighborX, (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } -void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { - scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborsAfterFtoCX, + scatterNodesFromRecvBufferGPU(para, level, streamIndex,¶->getParD(level)->recvProcessNeighborsAfterFtoCX, (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))); } -void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, - int streamIndex, +void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, + int level, CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! \details steps: //! 1. copy data from device to host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - cudaMemoryManager->cudaCopyProcessNeighborXFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborXFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! 2. start non-blocking receive (MPI) @@ -181,7 +181,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //! 7. copy received data from host to device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) - cudaMemoryManager->cudaCopyProcessNeighborXFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborXFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -189,22 +189,22 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborY, (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } -void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCY, (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborY, ¶->getParD(level)->recvProcessNeighborY, ¶->getParH(level)->sendProcessNeighborY, @@ -212,38 +212,39 @@ void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm } void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCY, ¶->getParD(level)->recvProcessNeighborsAfterFtoCY, ¶->getParH(level)->sendProcessNeighborsAfterFtoCY, ¶->getParH(level)->recvProcessNeighborsAfterFtoCY); } -void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborY, (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } -void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborsAfterFtoCY, (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))); } void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, - int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, + CudaStreamIndex streamIndex, + std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) - cudaMemoryManager->cudaCopyProcessNeighborYFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborYFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// startNonBlockingMpiReceive((unsigned int)(*sendProcessNeighborHost).size(), comm, recvProcessNeighborHost); @@ -276,7 +277,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Host to Device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - cudaMemoryManager->cudaCopyProcessNeighborYFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborYFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } @@ -285,61 +286,62 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborZ, (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))); } -void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { collectNodesInSendBufferGPU(para, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCZ, (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))); } void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborZ, ¶->getParD(level)->recvProcessNeighborZ, ¶->getParH(level)->sendProcessNeighborZ, ¶->getParH(level)->recvProcessNeighborZ); } void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex) + int level, CudaStreamIndex streamIndex) { - exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, + exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex, ¶->getParD(level)->sendProcessNeighborsAfterFtoCZ, ¶->getParD(level)->recvProcessNeighborsAfterFtoCZ, ¶->getParH(level)->sendProcessNeighborsAfterFtoCZ, ¶->getParH(level)->recvProcessNeighborsAfterFtoCZ); } -void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex) { scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborZ, (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))); } -void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex) +void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex) { scatterNodesFromRecvBufferGPU(para, level, streamIndex, ¶->getParD(level)->recvProcessNeighborsAfterFtoCZ, (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, - int streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, +void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, + CudaStreamIndex streamIndex, + std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) - cudaMemoryManager->cudaCopyProcessNeighborZFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborZFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// startNonBlockingMpiReceive((unsigned int)(*sendProcessNeighborHost).size(), comm, recvProcessNeighborHost); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -386,7 +388,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe // copy Host to Device for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - cudaMemoryManager->cudaCopyProcessNeighborZFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs, streamIndex); + cudaMemoryManager->cudaCopyProcessNeighborZFsHD(level, i, (*recvProcessNeighborDev)[i].memsizeFs); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index ec930ebbc06554e948204b74e79e0e25b85f57b5..8302ffdc47bfa012c47df00f90c2491039f4eaee 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -6,6 +6,7 @@ #include "GPU/GPU_Interface.h" #include "LBM/LB.h" #include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" //! \file ExchangeData27.h //! \ingroup GPU @@ -14,9 +15,9 @@ ////////////////////////////////////////////////////////////////////////// // 1D domain decomposition -void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, +void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level); -void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, +void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level); ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition @@ -24,13 +25,13 @@ void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, Cud // functions used for all directions //! \brief Collect the send nodes in a buffer on the gpu -void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex, - std::vector<ProcessNeighbor27> *sendProcessNeighbor, - unsigned int numberOfSendProcessNeighbors); +void collectNodesInSendBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, + std::vector<ProcessNeighbor27> *sendProcessNeighbor, + unsigned int numberOfSendProcessNeighbors); //! \brief Distribute the receive nodes from the buffer on the gpu -void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex, - std::vector<ProcessNeighbor27> *recvProcessNeighborDev, - unsigned int numberOfRecvProcessNeighbors); +void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, + std::vector<ProcessNeighbor27> *recvProcessNeighborDev, + unsigned int numberOfRecvProcessNeighbors); //! \brief Copy nodes which are part of the communication in multiple directions //! \details The nodes are copied from the receive buffer in one direction to the send buffer in another direction. The //! copy operation is conducted on the cpu. @@ -49,21 +50,20 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu //! \details Needed to exchange all nodes, used in the communication after collision step -void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex); +void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine. //! See [master thesis of Anna Wellmann] -void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex); +void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); //! \brief Exchange routine in x direction for simulations on multiple gpus //! \details Send and receive the nodes from the communication buffers on the gpus. //! \param Communicator is needed for the communication between the processes with mpi //! \param CudaMemoryManager is needed for moving the data between host and device -//! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers //! to the send and receive arrays, both on the device and the host void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex, + int level, CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, @@ -71,59 +71,59 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe //! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes //! \details Used in the communication after collision step void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); //! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two //! interpolation processes on refined grids //! \details Only exchange nodes which are part of the interpolation process on //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and //! before the interpolation coarse to fine. See [master thesis of Anna Wellmann] void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu //! \details Needed to exchange all nodes, used in the communication after collision step -void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex); +void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine. //! See [master thesis of Anna Wellmann] -void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex); +void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); ////////////////////////////////////////////////////////////////////////// // y -void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex); -void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex); +void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); +void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex, + int level,CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHos); void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); -void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex); -void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); +void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); +void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); // z -void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex); -void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex); +void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); +void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, - int level, int streamIndex, + int level, CudaStreamIndex streamIndex, std::vector<ProcessNeighbor27> *sendProcessNeighborDev, std::vector<ProcessNeighbor27> *recvProcessNeighborDev, std::vector<ProcessNeighbor27> *sendProcessNeighborHost, std::vector<ProcessNeighbor27> *recvProcessNeighborHost); void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, - CudaMemoryManager *cudaMemoryManager, int level, int streamIndex); + CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex); -void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex); -void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex); +void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex); +void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex); ////////////////////////////////////////////////////////////////////////// // 3D domain decomposition convection diffusion diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp index e197fb5c28611e77406b30ab39aa6af2f54b9ef5..3b511264e9c7edc80bbe367cac4a9b6d8725674b 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp @@ -19,34 +19,29 @@ std::shared_ptr<GridProvider> GridProvider::makeGridReader(FILEFORMAT format, st return std::shared_ptr<GridProvider>(new GridReader(format, para, cudaMemoryManager)); } -void GridProvider::setNumberOfNodes(const int numberOfNodes, const int level) const +void GridProvider::setNumberOfNodes(uint numberOfNodes, int level) const { - para->getParH(level)->numberOfNodes = numberOfNodes; - para->getParD(level)->numberOfNodes = numberOfNodes; - para->getParH(level)->mem_size_real_SP = sizeof(real) * para->getParH(level)->numberOfNodes; - para->getParH(level)->mem_size_int_SP = sizeof(uint) * para->getParH(level)->numberOfNodes; - para->getParD(level)->mem_size_real_SP = sizeof(real) * para->getParD(level)->numberOfNodes; - para->getParD(level)->mem_size_int_SP = sizeof(uint) * para->getParD(level)->numberOfNodes; + para->getParH(level)->numberOfNodes = (unsigned long long)numberOfNodes; + para->getParD(level)->numberOfNodes = (unsigned long long)numberOfNodes; + para->getParH(level)->memSizeRealLBnodes = sizeof(real) * para->getParH(level)->numberOfNodes; + para->getParD(level)->memSizeRealLBnodes = sizeof(real) * para->getParD(level)->numberOfNodes; + para->getParH(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParH(level)->numberOfNodes; + para->getParD(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParD(level)->numberOfNodes; } -void GridProvider::setNumberOfFluidNodes(const int numberOfNodes, const int level) const +void GridProvider::setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const { - para->getParH(level)->numberOfFluidNodes = numberOfNodes; - para->getParD(level)->numberOfFluidNodes = numberOfNodes; + para->getParH(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes; + para->getParD(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes; } -void GridProvider::setNumberOfFluidNodesBorder(const int numberOfNodes, const int level) const { - para->getParH(level)->numberOfFluidNodesBorder = numberOfNodes; - para->getParD(level)->numberOfFluidNodesBorder = numberOfNodes; -} - -void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level) const +void GridProvider::setInitalNodeValues(uint numberOfNodes, int level) const { - for (int j = 1; j <= numberOfNodes; j++) + for (uint pos = 1; pos <= numberOfNodes; pos++) { - const real coordX = para->getParH(level)->coordinateX[j]; - const real coordY = para->getParH(level)->coordinateY[j]; - const real coordZ = para->getParH(level)->coordinateZ[j]; + const real coordX = para->getParH(level)->coordinateX[pos]; + const real coordY = para->getParH(level)->coordinateY[pos]; + const real coordZ = para->getParH(level)->coordinateZ[pos]; real rho, vx, vy, vz; @@ -63,40 +58,40 @@ void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level) vz = real(0.0); } - para->getParH(level)->rho[j] = rho; - para->getParH(level)->velocityX[j] = vx; - para->getParH(level)->velocityY[j] = vy; - para->getParH(level)->velocityZ[j] = vz; + para->getParH(level)->rho[pos] = rho; + para->getParH(level)->velocityX[pos] = vx; + para->getParH(level)->velocityY[pos] = vy; + para->getParH(level)->velocityZ[pos] = vz; ////////////////////////////////////////////////////////////////////////// if (para->getCalcMedian()) { - para->getParH(level)->vx_SP_Med[j] = 0.0f; - para->getParH(level)->vy_SP_Med[j] = 0.0f; - para->getParH(level)->vz_SP_Med[j] = 0.0f; - para->getParH(level)->rho_SP_Med[j] = 0.0f; - para->getParH(level)->press_SP_Med[j] = 0.0f; + para->getParH(level)->vx_SP_Med[pos] = 0.0f; + para->getParH(level)->vy_SP_Med[pos] = 0.0f; + para->getParH(level)->vz_SP_Med[pos] = 0.0f; + para->getParH(level)->rho_SP_Med[pos] = 0.0f; + para->getParH(level)->press_SP_Med[pos] = 0.0f; } if (para->getUseWale()) { - para->getParH(level)->turbViscosity[j] = 0.0f; + para->getParH(level)->turbViscosity[pos] = 0.0f; //Debug - para->getParH(level)->gSij[j] = 0.0f; - para->getParH(level)->gSDij[j] = 0.0f; - para->getParH(level)->gDxvx[j] = 0.0f; - para->getParH(level)->gDyvx[j] = 0.0f; - para->getParH(level)->gDzvx[j] = 0.0f; - para->getParH(level)->gDxvy[j] = 0.0f; - para->getParH(level)->gDyvy[j] = 0.0f; - para->getParH(level)->gDzvy[j] = 0.0f; - para->getParH(level)->gDxvz[j] = 0.0f; - para->getParH(level)->gDyvz[j] = 0.0f; - para->getParH(level)->gDzvz[j] = 0.0f; + para->getParH(level)->gSij[pos] = 0.0f; + para->getParH(level)->gSDij[pos] = 0.0f; + para->getParH(level)->gDxvx[pos] = 0.0f; + para->getParH(level)->gDyvx[pos] = 0.0f; + para->getParH(level)->gDzvx[pos] = 0.0f; + para->getParH(level)->gDxvy[pos] = 0.0f; + para->getParH(level)->gDyvy[pos] = 0.0f; + para->getParH(level)->gDzvy[pos] = 0.0f; + para->getParH(level)->gDxvz[pos] = 0.0f; + para->getParH(level)->gDyvz[pos] = 0.0f; + para->getParH(level)->gDzvz[pos] = 0.0f; } if (para->getIsBodyForce()) { - para->getParH(level)->forceX_SP[j] = 0.0f; - para->getParH(level)->forceY_SP[j] = 0.0f; - para->getParH(level)->forceZ_SP[j] = 0.0f; + para->getParH(level)->forceX_SP[pos] = 0.0f; + para->getParH(level)->forceY_SP[pos] = 0.0f; + para->getParH(level)->forceZ_SP[pos] = 0.0f; } } diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h index 5fc5826735643ec748da169160e782004d7e5fb7..007db1e0d8e27b3810aa38c089bae8069bbe5813 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h @@ -5,7 +5,7 @@ #include <vector> #include <memory> - +#include "LBM/LB.h" #include "PointerDefinitions.h" #include "VirtualFluids_GPU_export.h" #include "gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h" @@ -24,34 +24,35 @@ public: static std::shared_ptr<GridProvider> makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator); static std::shared_ptr<GridProvider> makeGridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager); - virtual void allocArrays_CoordNeighborGeo() = 0; - virtual void allocArrays_BoundaryValues() = 0; - virtual void allocArrays_BoundaryQs() = 0; + virtual void allocArrays_CoordNeighborGeo() = 0; + virtual void allocArrays_BoundaryValues() = 0; + virtual void allocArrays_BoundaryQs() = 0; virtual void allocArrays_OffsetScale() = 0; - virtual void allocArrays_fluidNodeIndices() = 0; - virtual void allocArrays_fluidNodeIndicesBorder() = 0; + virtual void allocArrays_taggedFluidNodes() = 0; + + virtual void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) = 0; + virtual void sortFluidNodeTags() = 0; - virtual void setDimensions() = 0; - virtual void setBoundingBox() = 0; - virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0; + virtual void setDimensions() = 0; + virtual void setBoundingBox() = 0; + virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0; virtual void allocAndCopyForcing(); virtual void allocAndCopyQuadricLimiters(); virtual void freeMemoryOnHost(); virtual void cudaCopyDataToHost(int level); - virtual ~GridProvider() = default; + virtual ~GridProvider() = default; virtual void initalGridInformations() = 0; protected: - void setNumberOfNodes(const int numberOfNodes, const int level) const; - void setNumberOfFluidNodes(const int numberOfNodes, const int level) const; - void setNumberOfFluidNodesBorder(const int numberOfNodes, const int level) const; - virtual void setInitalNodeValues(const int numberOfNodes, const int level) const; - - void setPressSizePerLevel(int level, int sizePerLevel) const; - void setVelocitySizePerLevel(int level, int sizePerLevel) const; - void setOutflowSizePerLevel(int level, int sizePerLevel) const; + void setNumberOfNodes(uint numberOfNodes, int level) const; + void setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const; + virtual void setInitalNodeValues(uint numberOfNodes, int level) const; + + void setPressSizePerLevel(int level, int sizePerLevel) const; + void setVelocitySizePerLevel(int level, int sizePerLevel) const; + void setOutflowSizePerLevel(int level, int sizePerLevel) const; std::shared_ptr<Parameter> para; std::shared_ptr<CudaMemoryManager> cudaMemoryManager; diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp index fa432a1d9c3922b88e93588548db74083275ef1e..a1c8554cc4e262e9f1eca4204aed4ffcfd4c3a87 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp @@ -47,7 +47,7 @@ bool GridReader::getBinaer() void rearrangeGeometry(Parameter* para, int lev) { - for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++) + for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) { if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD) { @@ -74,11 +74,11 @@ void GridReader::allocArrays_CoordNeighborGeo() uint numberOfNodesGlobal = 0; std::cout << "Number of Nodes: " << std::endl; - for (uint level = 0; level <= maxLevel; level++) - { - int numberOfNodesPerLevel = coordX.getSize(level) + 1; - numberOfNodesGlobal += numberOfNodesPerLevel; - std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl; + for (uint level = 0; level <= maxLevel; level++) + { + const uint numberOfNodesPerLevel = coordX.getSize(level) + 1; + numberOfNodesGlobal += numberOfNodesPerLevel; + std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl; setNumberOfNodes(numberOfNodesPerLevel, level); @@ -130,9 +130,9 @@ void GridReader::allocArrays_BoundaryValues() for (uint i = 0; i < channelBoundaryConditions.size(); i++) { - if ( this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); } - else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); } - else if (this->channelBoundaryConditions[i] == "outflow") { setOutflowValues(i); } + if ( this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); } + else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); } + else if (this->channelBoundaryConditions[i] == "outflow") { setOutflowValues(i); } } setVelocityValues(); @@ -218,16 +218,20 @@ void GridReader::allocArrays_OffsetScale() std::cout << "-----Ende OffsetScale------" << std::endl; } -void GridReader::allocArrays_fluidNodeIndices() { +void GridReader::allocArrays_taggedFluidNodes() { std::cout << "GridReader::allocArrays_fluidNodeIndices not implemented" << std::endl; // TODO } -void GridReader::allocArrays_fluidNodeIndicesBorder() { - std::cout << "GridReader::allocArrays_fluidNodeIndicesBorder not implemented" << std::endl; +void GridReader::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level){ + std::cout << "GridReader::tagFluidNodeIndices not implemented" << std::endl; // TODO } +void GridReader::sortFluidNodeTags(){ + std::cout << "GridReader::sortFluidNodeTags not implemented" << std::endl; + // TODO +} void GridReader::setPressureValues(int channelSide) const { @@ -281,23 +285,23 @@ void GridReader::fillVelocityVectors(int channelSide) delete[] veloX_ValuesPerSide; delete[] veloY_ValuesPerSide; delete[] veloZ_ValuesPerSide; - } - } + } + } } -void GridReader::setVelocityValues() { +void GridReader::setVelocityValues() { for (int level = 0; level < (int)(velocityX_BCvalues.size()); level++) { - - int sizePerLevel = (int) velocityX_BCvalues[level].size(); + + int sizePerLevel = (int) velocityX_BCvalues[level].size(); std::cout << "complete size velocity level " << level << " : " << sizePerLevel << std::endl; setVelocitySizePerLevel(level, sizePerLevel); - - if (sizePerLevel > 1) { + + if (sizePerLevel > 1) { cudaMemoryManager->cudaAllocVeloBC(level); setVelocity(level, sizePerLevel); - cudaMemoryManager->cudaCopyVeloBC(level); + cudaMemoryManager->cudaCopyVeloBC(level); } } } @@ -668,8 +672,8 @@ void GridReader::modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned /*------------------------------------------------------------------------------------------------*/ /*---------------------------------------private q methods----------------------------------------*/ /*------------------------------------------------------------------------------------------------*/ -void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, - std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const +void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, + std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const { boundaryQ->setValuesInVector(Qs, level); boundaryQ->setIndexInVector(index, level); @@ -685,7 +689,7 @@ void GridReader::copyVectorsToQStruct(std::vector<std::vector<real>> &Qs, for (int direction = 0; direction < para->getD3Qxx(); direction++) { for (size_t indexQ = 0; indexQ < sizeOfValues; indexQ++) { - qTemp.q27[direction][indexQ] = Qs[direction][indexQ]; + qTemp.q27[direction][indexQ] = Qs[direction][indexQ]; } } @@ -847,46 +851,46 @@ void GridReader::setBoundingBox() void GridReader::initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string boundaryCondition) { - std::vector<unsigned int>neighVec; - std::vector<unsigned int>indexVec; - - int counter = 0; - - for(unsigned int i=0; i<neighX->getLevel();i++) { - if(boundaryCondition =="periodic_y"){ - neighVec = neighY->getVec(i); - } - else if(boundaryCondition =="periodic_x"){ - neighVec = neighX->getVec(i); - } - else if(boundaryCondition =="periodic_z"){ - neighVec = neighZ->getVec(i); - } - else { - std::cout << "wrong String in periodicValue" << std::endl; - exit(1); - } + std::vector<unsigned int>neighVec; + std::vector<unsigned int>indexVec; - for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) { - if(periodV[i][0][counter] != 0) { - neighVec[*it]=periodV[i][0][counter]; - } + int counter = 0; - counter++; - } + for(unsigned int i=0; i<neighX->getLevel();i++) { + if(boundaryCondition =="periodic_y"){ + neighVec = neighY->getVec(i); + } + else if(boundaryCondition =="periodic_x"){ + neighVec = neighX->getVec(i); + } + else if(boundaryCondition =="periodic_z"){ + neighVec = neighZ->getVec(i); + } + else { + std::cout << "wrong String in periodicValue" << std::endl; + exit(1); + } + for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) { + if(periodV[i][0][counter] != 0) { + neighVec[*it]=periodV[i][0][counter]; + } - if(boundaryCondition =="periodic_y"){ - neighY->setVec(i, neighVec); - } - else if(boundaryCondition =="periodic_x"){ - neighX->setVec(i, neighVec); - } - else if(boundaryCondition =="periodic_z"){ - neighZ->setVec(i, neighVec); - } + counter++; + } - } + + if(boundaryCondition =="periodic_y"){ + neighY->setVec(i, neighVec); + } + else if(boundaryCondition =="periodic_x"){ + neighX->setVec(i, neighVec); + } + else if(boundaryCondition =="periodic_z"){ + neighZ->setVec(i, neighVec); + } + + } } void GridReader::makeReader(std::shared_ptr<Parameter> para) @@ -917,9 +921,9 @@ void GridReader::makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, st void GridReader::setChannelBoundaryCondition() { - for (std::size_t i = 0; i < channelDirections.size(); i++) - { - this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition(); - std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl; - } -} \ No newline at end of file + for (std::size_t i = 0; i < channelDirections.size(); i++) + { + this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition(); + std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl; + } +} diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h index 18efb6a7885191312ea4e2fbb22eb45162ab1de1..041d2c3ce94592f792c5a850eebd14c07f4db1b4 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h @@ -3,9 +3,9 @@ #include "../GridProvider.h" -#include <vector> -#include <string> #include <memory> +#include <string> +#include <vector> #include "LBM/LB.h" @@ -16,15 +16,14 @@ class BoundaryValues; class BoundaryQs; class CoordNeighborGeoV; -class VIRTUALFLUIDS_GPU_EXPORT GridReader - : public GridProvider +class VIRTUALFLUIDS_GPU_EXPORT GridReader : public GridProvider { private: - bool binaer; - std::vector<std::string> channelDirections; - std::vector<std::string> channelBoundaryConditions; - std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB; - std::vector<std::shared_ptr<BoundaryValues> > BC_Values; + bool binaer; + std::vector<std::string> channelDirections; + std::vector<std::string> channelBoundaryConditions; + std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB; + std::vector<std::shared_ptr<BoundaryValues>> BC_Values; std::vector<std::vector<real>> velocityX_BCvalues, velocityY_BCvalues, velocityZ_BCvalues; std::vector<std::vector<std::vector<real>>> velocityQs; @@ -34,57 +33,62 @@ private: std::vector<std::vector<real>> outflowBCvalues; public: - GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager); - ~GridReader(); - void allocArrays_CoordNeighborGeo() override; - void allocArrays_BoundaryValues() override; + GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, + std::shared_ptr<CudaMemoryManager> cudaMemoryManager); + ~GridReader() override; + void allocArrays_CoordNeighborGeo() override; + void allocArrays_BoundaryValues() override; void allocArrays_OffsetScale() override; - void allocArrays_fluidNodeIndices() override; - void allocArrays_fluidNodeIndicesBorder() override; + void allocArrays_taggedFluidNodes() override; - void initalValuesDomainDecompostion(int level); + void tagFluidNodeIndices(const std::vector<uint> &taggedFluidNodeIndices, CollisionTemplate tag, uint level) override; - void setChannelBoundaryCondition(); + void sortFluidNodeTags() override; - void allocArrays_BoundaryQs() override; - bool getBinaer(); - void setDimensions() override; - void setBoundingBox() override; - void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) override; + void initalValuesDomainDecompostion(int level); + + void setChannelBoundaryCondition(); + + void allocArrays_BoundaryQs() override; + bool getBinaer(); + void setDimensions() override; + void setBoundingBox() override; + void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int>>> periodV, + std::vector<std::vector<unsigned int>> periodIndex, std::string way) override; private: - void makeReader(std::shared_ptr<Parameter> para); - void makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, std::shared_ptr<Parameter> para); + void makeReader(std::shared_ptr<Parameter> para); + void makeReader(std::vector<std::shared_ptr<BoundaryQs>> &BC_Qs, std::shared_ptr<Parameter> para); - void setPressureValues(int channelSide) const; - void setPressRhoBC(int sizePerLevel, int level, int channelSide) const; + void setPressureValues(int channelSide) const; + void setPressRhoBC(int sizePerLevel, int level, int channelSide) const; - void fillVelocityVectors(int channelSide); + void fillVelocityVectors(int channelSide); void setVelocityValues(); - void setVelocity(int level, int sizePerLevel) const; + void setVelocity(int level, int sizePerLevel) const; - void setOutflowValues(int channelSide) const; - void setOutflow(int level, int sizePerLevel, int channelSide) const; + void setOutflowValues(int channelSide) const; + void setOutflow(int level, int sizePerLevel, int channelSide) const; - - //void fillVelocityQVectors(int channelSide); + // void fillVelocityQVectors(int channelSide); void setPressQs(std::shared_ptr<BoundaryQs> boundaryQ) const; - void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ); - void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const; - void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const; - void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const; - void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ); + void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const; + void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const; + void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const; + void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; - void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, + void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; void copyVectorsToQStruct(std::vector<std::vector<real>> &Qs, std::vector<int> &index, QforBoundaryConditions &Q) const; void initalQStruct(QforBoundaryConditions &Q, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; - void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; - void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; - void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; - void setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const; - bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + void setQ27Size(QforBoundaryConditions &Q, real *QQ, unsigned int sizeQ) const; + bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const; + public: void initalGridInformations() override; }; diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index 7f61b4357276f38d8fde71489dcf60348b402941..38a7eef7e356e2f2da4c1a819d8375035a37313a 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -1,5 +1,6 @@ #include "GridGenerator.h" +#include "LBM/LB.h" #include "Parameter/Parameter.h" #include "GridGenerator/grid/GridBuilder/GridBuilder.h" #include "GPU/CudaMemoryManager.h" @@ -10,19 +11,24 @@ #include <algorithm> #include "utilities/math/Math.h" #include "Output/QDebugWriter.hpp" +#include "GridGenerator/TransientBCSetter/TransientBCSetter.h" #include "utilities/communication.h" #include "Communication/Communicator.h" +#include <logger/Logger.h> + using namespace vf::lbm::dir; -GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator): - mpiProcessID(communicator.getPID()), builder(builder) +GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, + std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator &communicator) + : mpiProcessID(communicator.getPID()), builder(builder) { this->para = para; this->cudaMemoryManager = cudaMemoryManager; this->indexRearrangement = std::make_unique<IndexRearrangementForStreams>(para, builder, communicator); - this->interpolationGrouper = std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder); + this->interpolationGrouper = + std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder); } GridGenerator::~GridGenerator() = default; @@ -55,15 +61,15 @@ void GridGenerator::allocArrays_CoordNeighborGeo() std::cout << "Number of Level: " << numberOfLevels << std::endl; int numberOfNodesGlobal = 0; std::cout << "Number of Nodes: " << std::endl; - - for (uint level = 0; level < numberOfLevels; level++) + + for (uint level = 0; level < numberOfLevels; level++) { - const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1; + const uint numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1; numberOfNodesGlobal += numberOfNodesPerLevel; std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl; - + setNumberOfNodes(numberOfNodesPerLevel, level); - + cudaMemoryManager->cudaAllocCoord(level); cudaMemoryManager->cudaAllocSP(level); //cudaMemoryManager->cudaAllocF3SP(level); @@ -71,7 +77,7 @@ void GridGenerator::allocArrays_CoordNeighborGeo() if(para->getUseTurbulentViscosity()) cudaMemoryManager->cudaAllocTurbulentViscosity(level); - + if(para->getIsBodyForce()) cudaMemoryManager->cudaAllocBodyForce(level); @@ -100,28 +106,104 @@ void GridGenerator::allocArrays_CoordNeighborGeo() std::cout << "-----finish Coord, Neighbor, Geo------" << std::endl; } -void GridGenerator::allocArrays_fluidNodeIndices() { - for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { - setNumberOfFluidNodes(builder->getNumberOfFluidNodes(level), level); - cudaMemoryManager->cudaAllocFluidNodeIndices(level); - builder->getFluidNodeIndices(para->getParH(level)->fluidNodeIndices, level); - cudaMemoryManager->cudaCopyFluidNodeIndices(level); - } +void GridGenerator::allocArrays_taggedFluidNodes() { + + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) + { + for ( CollisionTemplate tag: all_CollisionTemplate ) + { //TODO: Need to add CollisionTemplate to GridBuilder to allow as argument and get rid of indivual get funtions for fluid node indices... and clean up this mess + switch(tag) + { + case CollisionTemplate::Default: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodes(level), CollisionTemplate::Default, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::Default, level); + builder->getFluidNodeIndices(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::Default], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::Default, level); + if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0) + para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag); + break; + case CollisionTemplate::SubDomainBorder: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::SubDomainBorder, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level); + builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level); + break; + case CollisionTemplate::WriteMacroVars: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesMacroVars(level), CollisionTemplate::WriteMacroVars, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::WriteMacroVars, level); + builder->getFluidNodeIndicesMacroVars(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::WriteMacroVars], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::WriteMacroVars, level); + if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0) + para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag); + break; + case CollisionTemplate::ApplyBodyForce: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesApplyBodyForce(level), CollisionTemplate::ApplyBodyForce, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::ApplyBodyForce, level); + builder->getFluidNodeIndicesApplyBodyForce(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::ApplyBodyForce], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::ApplyBodyForce, level); + if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0) + para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag); + break; + case CollisionTemplate::AllFeatures: + this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesAllFeatures(level), CollisionTemplate::AllFeatures, level); + cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::AllFeatures, level); + builder->getFluidNodeIndicesAllFeatures(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::AllFeatures], level); + cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::AllFeatures, level); + if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0) + para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag); + break; + default: + break; + } + } + VF_LOG_INFO("Number of tagged nodes on level {}:", level); + VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}", + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default], + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::WriteMacroVars], + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::ApplyBodyForce], + para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures] ); + } } -void GridGenerator::allocArrays_fluidNodeIndicesBorder() { - for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { - setNumberOfFluidNodesBorder(builder->getNumberOfFluidNodesBorder(level), level); - cudaMemoryManager->cudaAllocFluidNodeIndicesBorder(level); - builder->getFluidNodeIndicesBorder(para->getParH(level)->fluidNodeIndicesBorder, level); - cudaMemoryManager->cudaCopyFluidNodeIndicesBorder(level); +void GridGenerator::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) { + switch(tag) + { + case CollisionTemplate::WriteMacroVars: + builder->addFluidNodeIndicesMacroVars( taggedFluidNodeIndices, level ); + break; + case CollisionTemplate::ApplyBodyForce: + builder->addFluidNodeIndicesApplyBodyForce( taggedFluidNodeIndices, level ); + break; + case CollisionTemplate::AllFeatures: + builder->addFluidNodeIndicesAllFeatures( taggedFluidNodeIndices, level ); + break; + case CollisionTemplate::Default: + case CollisionTemplate::SubDomainBorder: + throw std::runtime_error("Cannot tag fluid nodes as Default or SubDomainBorder!"); + default: + throw std::runtime_error("Tagging fluid nodes with invald tag!"); + break; + } + +} + +void GridGenerator::sortFluidNodeTags() { + VF_LOG_INFO("Start sorting tagged fluid nodes..."); + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) + { + builder->sortFluidNodeIndicesAllFeatures(level); //has to be called first! + builder->sortFluidNodeIndicesMacroVars(level); + builder->sortFluidNodeIndicesApplyBodyForce(level); + } + VF_LOG_INFO("done."); } void GridGenerator::allocArrays_BoundaryValues() { std::cout << "------read BoundaryValues------" << std::endl; - int blocks = 0; + int blocks; for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { const auto numberOfPressureValues = int(builder->getPressureSize(level)); @@ -129,6 +211,7 @@ void GridGenerator::allocArrays_BoundaryValues() //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// para->getParH(level)->pressureBC.numberOfBCnodes = 0; + para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor(); if (numberOfPressureValues > 1) { blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1; @@ -148,12 +231,12 @@ void GridGenerator::allocArrays_BoundaryValues() //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// para->getParH(level)->slipBC.numberOfBCnodes = 0; - if (numberOfSlipValues > 1) - { + if (numberOfSlipValues > 1) { blocks = (numberOfSlipValues / para->getParH(level)->numberofthreads) + 1; para->getParH(level)->slipBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads; cudaMemoryManager->cudaAllocSlipBC(level); - builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY, para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level); + builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY, + para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level); cudaMemoryManager->cudaCopySlipBC(level); } para->getParD(level)->slipBC.numberOfBCnodes = para->getParH(level)->slipBC.numberOfBCnodes; @@ -173,11 +256,11 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParH(level)->stressBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads; cudaMemoryManager->cudaAllocStressBC(level); cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor()); - builder->getStressValues( para->getParH(level)->stressBC.normalX, para->getParH(level)->stressBC.normalY, para->getParH(level)->stressBC.normalZ, + builder->getStressValues( para->getParH(level)->stressBC.normalX, para->getParH(level)->stressBC.normalY, para->getParH(level)->stressBC.normalZ, para->getParH(level)->stressBC.Vx, para->getParH(level)->stressBC.Vy, para->getParH(level)->stressBC.Vz, para->getParH(level)->stressBC.Vx1, para->getParH(level)->stressBC.Vy1, para->getParH(level)->stressBC.Vz1, - para->getParH(level)->stressBC.k, para->getParH(level)->stressBC.kN, - para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0, + para->getParH(level)->stressBC.k, para->getParH(level)->stressBC.kN, + para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0, level); cudaMemoryManager->cudaCopyStressBC(level); @@ -187,7 +270,7 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParH(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx(); para->getParD(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx(); } - + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { const auto numberOfVelocityValues = int(builder->getVelocitySize(level)); @@ -204,7 +287,8 @@ void GridGenerator::allocArrays_BoundaryValues() cudaMemoryManager->cudaAllocVeloBC(level); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy, para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level); + builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy, + para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -242,6 +326,100 @@ void GridGenerator::allocArrays_BoundaryValues() para->getParD(level)->numberOfVeloBCnodesRead = para->getParH(level)->velocityBC.numberOfBCnodes * para->getD3Qxx(); } + for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) { + const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level)); + *logging::out << logging::Logger::INFO_INTERMEDIATE << "size precursor level " << level << " : " << numberOfPrecursorValues << "\n"; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1; + para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads; + para->getParD(level)->precursorBC.sizeQ = para->getParH(level)->precursorBC.sizeQ; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParH(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues; + para->getParD(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues; + para->getParH(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx(); + para->getParD(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx(); + + if (numberOfPrecursorValues > 1) + { + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + cudaMemoryManager->cudaAllocPrecursorBC(level); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + builder->getPrecursorValues( + para->getParH(level)->precursorBC.planeNeighbor0PP, para->getParH(level)->precursorBC.planeNeighbor0PM, + para->getParH(level)->precursorBC.planeNeighbor0MP, para->getParH(level)->precursorBC.planeNeighbor0MM, + para->getParH(level)->precursorBC.weights0PP, para->getParH(level)->precursorBC.weights0PM, + para->getParH(level)->precursorBC.weights0MP, para->getParH(level)->precursorBC.weights0MM, + para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes, + para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads, + para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ, + level); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + para->getParD(level)->precursorBC.numberOfPrecursorNodes = para->getParH(level)->precursorBC.numberOfPrecursorNodes; + para->getParD(level)->precursorBC.numberOfQuantities = para->getParH(level)->precursorBC.numberOfQuantities; + para->getParD(level)->precursorBC.timeStepsBetweenReads = para->getParH(level)->precursorBC.timeStepsBetweenReads; + para->getParD(level)->precursorBC.velocityX = para->getParH(level)->precursorBC.velocityX; + para->getParD(level)->precursorBC.velocityY = para->getParH(level)->precursorBC.velocityY; + para->getParD(level)->precursorBC.velocityZ = para->getParH(level)->precursorBC.velocityZ; + + for(auto reader : para->getParH(level)->transientBCInputFileReader) + { + if(reader->getNumberOfQuantities() != para->getParD(level)->precursorBC.numberOfQuantities) + throw std::runtime_error( + "Number of quantities in reader and number of quantities needed for precursor don't match!"); + } + + cudaMemoryManager->cudaCopyPrecursorBC(level); + cudaMemoryManager->cudaAllocPrecursorData(level); + + // read first timestep of precursor into next and copy to next on device + for(auto reader : para->getParH(level)->transientBCInputFileReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 0); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + //switch next with last pointers + real* tmp = para->getParD(level)->precursorBC.last; + para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + //read second timestep of precursor into next and copy next to device + real nextTime = para->getParD(level)->precursorBC.timeStepsBetweenReads*pow(2,-((real)level))*para->getTimeRatio(); + for(auto reader : para->getParH(level)->transientBCInputFileReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + para->getParD(level)->precursorBC.nPrecursorReads = 1; + + + //switch next with current pointers + tmp = para->getParD(level)->precursorBC.current; + para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + //start usual cycle of loading, i.e. read velocities of timestep after current and copy asynchronously to device + for(auto reader : para->getParH(level)->transientBCInputFileReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 2*nextTime); + } + + cudaMemoryManager->cudaCopyPrecursorData(level); + + para->getParD(level)->precursorBC.nPrecursorReads = 2; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // advection - diffusion stuff + if (para->getDiffOn()==true){ + throw std::runtime_error(" Advection Diffusion not implemented for Precursor!"); + } + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + } + if (builder->hasGeometryValues()) { @@ -303,7 +481,7 @@ void GridGenerator::initalValuesDomainDecompostion() if (para->getNumprocs() < 2) return; if ((para->getNumprocs() > 1) /*&& (procNeighborsSendX.size() == procNeighborsRecvX.size())*/) { - + // direction has to be changed in case of periodic BCs and multiple sub domains std::vector<int> fillOrder = { 0, 1, 2, 3, 4, 5 }; @@ -383,7 +561,7 @@ void GridGenerator::initalValuesDomainDecompostion() builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborX[indexProcessNeighbor].index, direction, level); if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC) - indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction); + indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction); //////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyProcessNeighborXIndex(level, indexProcessNeighbor); //////////////////////////////////////////////////////////////////////////////////////// @@ -446,7 +624,7 @@ void GridGenerator::initalValuesDomainDecompostion() //////////////////////////////////////////////////////////////////////////////////////// // malloc on host and device cudaMemoryManager->cudaAllocProcessNeighborY(level, indexProcessNeighbor); - //////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////// // init index arrays builder->getSendIndices(para->getParH(level)->sendProcessNeighborY[indexProcessNeighbor].index, direction, level); builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborY[indexProcessNeighbor].index, direction, @@ -465,7 +643,7 @@ void GridGenerator::initalValuesDomainDecompostion() if (tempSend > 0) { int indexProcessNeighbor = (int)para->getParH(level)->sendProcessNeighborZ.size(); - + para->getParH(level)->sendProcessNeighborZ.emplace_back(); para->getParD(level)->sendProcessNeighborZ.emplace_back(); para->getParH(level)->recvProcessNeighborZ.emplace_back(); @@ -755,9 +933,9 @@ void GridGenerator::allocArrays_BoundaryQs() //preprocessing real* QQ = para->getParH(i)->pressureBC.q27[0]; unsigned int sizeQ = para->getParH(i)->pressureBC.numberOfBCnodes; - QforBoundaryConditions Q; + QforBoundaryConditions &Q = para->getParH(i)->pressureBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); - + builder->getPressureQs(Q.q27, i); @@ -802,9 +980,9 @@ void GridGenerator::allocArrays_BoundaryQs() //preprocessing real* QQ = para->getParH(i)->slipBC.q27[0]; unsigned int sizeQ = para->getParH(i)->slipBC.numberOfBCnodes; - QforBoundaryConditions Q; + QforBoundaryConditions &Q = para->getParH(i)->slipBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); - + builder->getSlipQs(Q.q27, i); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopySlipBC(i); @@ -822,9 +1000,9 @@ void GridGenerator::allocArrays_BoundaryQs() //preprocessing real* QQ = para->getParH(i)->stressBC.q27[0]; unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes; - QforBoundaryConditions Q; + QforBoundaryConditions &Q = para->getParH(i)->stressBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); - + builder->getStressQs(Q.q27, i); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyStressBC(i); @@ -842,7 +1020,7 @@ void GridGenerator::allocArrays_BoundaryQs() //preprocessing real* QQ = para->getParH(i)->velocityBC.q27[0]; unsigned int sizeQ = para->getParH(i)->velocityBC.numberOfBCnodes; - QforBoundaryConditions Q; + QforBoundaryConditions &Q = para->getParH(i)->velocityBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); builder->getVelocityQs(Q.q27, i); @@ -874,6 +1052,50 @@ void GridGenerator::allocArrays_BoundaryQs() } } + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { + const auto numberOfPrecursorNodes = int(builder->getPrecursorSize(i)); + if (numberOfPrecursorNodes > 0) + { + std::cout << "size velocity level " << i << " : " << numberOfPrecursorNodes << std::endl; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //preprocessing + real* QQ = para->getParH(i)->precursorBC.q27[0]; + unsigned int sizeQ = para->getParH(i)->precursorBC.numberOfBCnodes; + QforBoundaryConditions Q; + getPointersToBoundaryConditions(Q, QQ, sizeQ); + + builder->getPrecursorQs(Q.q27, i); + + if (para->getDiffOn()) { + throw std::runtime_error("Advection diffusion not implemented for Precursor!"); + ////////////////////////////////////////////////////////////////////////// + // para->getParH(i)->TempVel.kTemp = numberOfVelocityNodes; + // para->getParD(i)->TempVel.kTemp = numberOfVelocityNodes; + // std::cout << "Groesse TempVel.kTemp = " << para->getParH(i)->TempPress.kTemp << std::endl; + // std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl; + // std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl; + // ////////////////////////////////////////////////////////////////////////// + // cudaMemoryManager->cudaAllocTempVeloBC(i); + // //cout << "nach alloc " << std::endl; + // ////////////////////////////////////////////////////////////////////////// + // for (int m = 0; m < numberOfVelocityNodes; m++) + // { + // para->getParH(i)->TempVel.temp[m] = para->getTemperatureInit(); + // para->getParH(i)->TempVel.tempPulse[m] = para->getTemperatureBC(); + // para->getParH(i)->TempVel.velo[m] = para->getVelocity(); + // para->getParH(i)->TempVel.k[m] = para->getParH(i)->Qinflow.k[m]; + // } + // ////////////////////////////////////////////////////////////////////////// + // //cout << "vor copy " << std::endl; + // cudaMemoryManager->cudaCopyTempVeloBCHD(i); + // //cout << "nach copy " << std::endl; + ////////////////////////////////////////////////////////////////////////// + } + cudaMemoryManager->cudaCopyPrecursorBC(i); + } + } + + for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) { const int numberOfGeometryNodes = builder->getGeometrySize(i); @@ -898,7 +1120,7 @@ void GridGenerator::allocArrays_BoundaryQs() //preprocessing real* QQ = para->getParH(i)->geometryBC.q27[0]; unsigned int sizeQ = para->getParH(i)->geometryBC.numberOfBCnodes; - QforBoundaryConditions Q; + QforBoundaryConditions &Q = para->getParH(i)->geometryBC; getPointersToBoundaryConditions(Q, QQ, sizeQ); ////////////////////////////////////////////////////////////////// @@ -948,7 +1170,7 @@ void GridGenerator::allocArrays_BoundaryQs() void GridGenerator::allocArrays_OffsetScale() { - for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++) + for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++) { const uint numberOfNodesPerLevelCF = builder->getNumberOfNodesCF(level); const uint numberOfNodesPerLevelFC = builder->getNumberOfNodesFC(level); @@ -987,7 +1209,7 @@ void GridGenerator::allocArrays_OffsetScale() builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level); builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level); builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level); - + if (para->getUseStreams() || para->getNumprocs() > 1) { // split fine-to-coarse indices into border and bulk interpolationGrouper->splitFineToCoarseIntoBorderAndBulk(level); @@ -1060,8 +1282,8 @@ std::string GridGenerator::verifyNeighborIndices(int level) const int wrongNeighbors = 0; int stopperNodes = 0; - for (uint index = 0; index < para->getParH(level)->numberOfNodes; index++) - oss << verifyNeighborIndex(level, index, invalidNodes, stopperNodes, wrongNeighbors); + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) + oss << verifyNeighborIndex(level, (int)index, invalidNodes, stopperNodes, wrongNeighbors); oss << "invalid nodes found: " << invalidNodes << "\n"; @@ -1090,7 +1312,7 @@ std::string GridGenerator::verifyNeighborIndex(int level, int index , int &inval //std::cout << para->getParH(level)->coordinateX[1] << ", " << para->getParH(level)->coordinateY[1] << ", " << para->getParH(level)->coordinateZ[1] << std::endl; //std::cout << para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] << std::endl; - + real maxX = para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] - delta; real maxY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] - delta; real maxZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] - delta; @@ -1131,8 +1353,8 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int if (!neighborValid) { oss << "NeighborX invalid from: (" << x << ", " << y << ", " << z << "), index: " << index << ", " - << direction << " neighborIndex: " << neighborIndex << - ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ << + << direction << " neighborIndex: " << neighborIndex << + ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ << "), expected neighborCoords : (" << neighborX << ", " << neighborY << ", " << neighborZ << ")\n"; numberOfWrongNeihgbors++; } @@ -1140,31 +1362,31 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int } void GridGenerator::getPointersToBoundaryConditions(QforBoundaryConditions& boundaryConditionStruct, real* subgridDistances, const unsigned int numberOfBCnodes){ - boundaryConditionStruct.q27[DIR_P00] = &subgridDistances[DIR_P00 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_M00] = &subgridDistances[DIR_M00 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_00P] = &subgridDistances[DIR_00P * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_00M] = &subgridDistances[DIR_00M * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000* numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfBCnodes]; - boundaryConditionStruct.q27[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfBCnodes]; -} \ No newline at end of file + boundaryConditionStruct.q27[DIR_P00] = &subgridDistances[DIR_P00 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_M00] = &subgridDistances[DIR_M00 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_00P] = &subgridDistances[DIR_00P * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_00M] = &subgridDistances[DIR_00M * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000 * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfBCnodes]; + boundaryConditionStruct.q27[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfBCnodes]; +} diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h index d2f56e1df4ee5658c61b8e8a3e94a820d1a4f2f1..c97ed02a64da1d5fafa18150c75d149f96484d44 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h @@ -40,6 +40,7 @@ #include "LBM/LB.h" + class Parameter; class GridBuilder; class IndexRearrangementForStreams; @@ -75,8 +76,10 @@ public: //! \brief allocates and initialized the sub-grid distances at the boundary conditions void allocArrays_BoundaryQs() override; void allocArrays_OffsetScale() override; - void allocArrays_fluidNodeIndices() override; - void allocArrays_fluidNodeIndicesBorder() override; + void allocArrays_taggedFluidNodes() override; + + void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) override; + void sortFluidNodeTags() override; virtual void setDimensions() override; virtual void setBoundingBox() override; diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp index bff054eb174a0f5fa34119deedde6f1c9733d83c..b1c398638cff1ec1b6d52f59f8e773183e270331 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp +++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.cpp @@ -35,6 +35,11 @@ void BoundaryConditionFactory::setStressBoundaryCondition(const StressBC boundar this->stressBoundaryCondition = boundaryConditionType; } +void BoundaryConditionFactory::setPrecursorBoundaryCondition(const PrecursorBC boundaryConditionType) +{ + this->precursorBoundaryCondition = boundaryConditionType; +} + boundaryCondition BoundaryConditionFactory::getVelocityBoundaryConditionPost(bool isGeometryBC) const { const VelocityBC &boundaryCondition = @@ -132,6 +137,22 @@ boundaryCondition BoundaryConditionFactory::getPressureBoundaryConditionPre() co case PressureBC::OutflowNonReflective: return QPressNoRhoDev27; break; + case PressureBC::OutflowNonReflectivePressureCorrection: + return QPressZeroRhoOutflowDev27; + default: + return nullptr; + } +} + +precursorBoundaryConditionFunc BoundaryConditionFactory::getPrecursorBoundaryConditionPost() const +{ + switch (this->precursorBoundaryCondition) { + case PrecursorBC::VelocityPrecursor: + return QPrecursorDevCompZeroPress; + break; + case PrecursorBC::DistributionsPrecursor: + return PrecursorDevDistributions; + break; default: return nullptr; } diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h index 9d6872c4847be72dff4be7137b774c8082e39e34..c6877cbfeffe5b32c0c2d336e46b02d68cd946a3 100644 --- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h +++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h @@ -42,11 +42,13 @@ #include "Parameter/Parameter.h" #include "gpu/GridGenerator/grid/BoundaryConditions/Side.h" + struct LBMSimulationParameter; class Parameter; using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>; using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>; +using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real timeRatio, real velocityRatio)>; class BoundaryConditionFactory { @@ -109,6 +111,8 @@ public: PressureNonEquilibriumCompressible, //! - OutflowNonReflective = outflow boundary condition, should be combined with VelocityAndPressureCompressible OutflowNonReflective, + //! - OutflowNonreflectivePressureCorrection = like OutflowNonReflective, but also reduces pressure overshoot + OutflowNonReflectivePressureCorrection, //! - NotSpecified = the user did not set a boundary condition NotSpecified }; @@ -128,11 +132,21 @@ public: // enum class OutflowBoundaryCondition {}; // TODO: // https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16 + enum class PrecursorBC { + //! - VelocityPrecursor + VelocityPrecursor, + //! - DisitributionsPrecursor + DistributionsPrecursor, + //! - NotSpecified = the user did not set a boundary condition + NotSpecified + }; + void setVelocityBoundaryCondition(const BoundaryConditionFactory::VelocityBC boundaryConditionType); void setNoSlipBoundaryCondition(const BoundaryConditionFactory::NoSlipBC boundaryConditionType); void setSlipBoundaryCondition(const BoundaryConditionFactory::SlipBC boundaryConditionType); void setPressureBoundaryCondition(const BoundaryConditionFactory::PressureBC boundaryConditionType); void setStressBoundaryCondition(const BoundaryConditionFactory::StressBC boundaryConditionType); + void setPrecursorBoundaryCondition(const BoundaryConditionFactory::PrecursorBC boundaryConditionType); //! \brief set a boundary condition for the geometry //! param boundaryConditionType: a velocity, no-slip or slip boundary condition //! \details suggestions for boundaryConditionType: @@ -152,6 +166,8 @@ public: [[nodiscard]] boundaryCondition getSlipBoundaryConditionPost(bool isGeometryBC = false) const; [[nodiscard]] boundaryCondition getPressureBoundaryConditionPre() const; [[nodiscard]] boundaryCondition getGeometryBoundaryConditionPost() const; + [[nodiscard]] precursorBoundaryConditionFunc getPrecursorBoundaryConditionPost() const; + [[nodiscard]] boundaryConditionWithParameter getStressBoundaryConditionPost() const; @@ -162,6 +178,7 @@ private: PressureBC pressureBoundaryCondition = PressureBC::NotSpecified; std::variant<VelocityBC, NoSlipBC, SlipBC> geometryBoundaryCondition = NoSlipBC::NoSlipImplicitBounceBack; StressBC stressBoundaryCondition = StressBC::NotSpecified; + PrecursorBC precursorBoundaryCondition = PrecursorBC::NotSpecified; // OutflowBoundaryConditon outflowBC // TODO: https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16 }; diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu index 8f54358e04063c9063c873caf02a86e76bb7f936..04f6afe4cf9ebd99dc293ded16f55a56f0d77036 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu @@ -74,7 +74,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( uint* neighborZ, real* distributions, real* distributionsAD, - int size_Mat, + unsigned long long numberOfLBnodes, real* forces, bool isEvenTimestep) { @@ -100,7 +100,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( ////////////////////////////////////////////////////////////////////////// // run for all indices in size_Mat and fluid nodes - if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) + if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID)) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -109,125 +109,125 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( Distributions27 dist; if (isEvenTimestep) { - dist.f[DIR_P00 ] = &distributions[DIR_P00 *size_Mat]; - dist.f[DIR_M00 ] = &distributions[DIR_M00 *size_Mat]; - dist.f[DIR_0P0 ] = &distributions[DIR_0P0 *size_Mat]; - dist.f[DIR_0M0 ] = &distributions[DIR_0M0 *size_Mat]; - dist.f[DIR_00P ] = &distributions[DIR_00P *size_Mat]; - dist.f[DIR_00M ] = &distributions[DIR_00M *size_Mat]; - dist.f[DIR_PP0 ] = &distributions[DIR_PP0 *size_Mat]; - dist.f[DIR_MM0 ] = &distributions[DIR_MM0 *size_Mat]; - dist.f[DIR_PM0 ] = &distributions[DIR_PM0 *size_Mat]; - dist.f[DIR_MP0 ] = &distributions[DIR_MP0 *size_Mat]; - dist.f[DIR_P0P ] = &distributions[DIR_P0P *size_Mat]; - dist.f[DIR_M0M ] = &distributions[DIR_M0M *size_Mat]; - dist.f[DIR_P0M ] = &distributions[DIR_P0M *size_Mat]; - dist.f[DIR_M0P ] = &distributions[DIR_M0P *size_Mat]; - dist.f[DIR_0PP ] = &distributions[DIR_0PP *size_Mat]; - dist.f[DIR_0MM ] = &distributions[DIR_0MM *size_Mat]; - dist.f[DIR_0PM ] = &distributions[DIR_0PM *size_Mat]; - dist.f[DIR_0MP ] = &distributions[DIR_0MP *size_Mat]; - dist.f[DIR_000] = &distributions[DIR_000*size_Mat]; - dist.f[DIR_PPP ] = &distributions[DIR_PPP *size_Mat]; - dist.f[DIR_MMP ] = &distributions[DIR_MMP *size_Mat]; - dist.f[DIR_PMP ] = &distributions[DIR_PMP *size_Mat]; - dist.f[DIR_MPP ] = &distributions[DIR_MPP *size_Mat]; - dist.f[DIR_PPM ] = &distributions[DIR_PPM *size_Mat]; - dist.f[DIR_MMM ] = &distributions[DIR_MMM *size_Mat]; - dist.f[DIR_PMM ] = &distributions[DIR_PMM *size_Mat]; - dist.f[DIR_MPM ] = &distributions[DIR_MPM *size_Mat]; + dist.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes]; + dist.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes]; + dist.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes]; + dist.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes]; + dist.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes]; + dist.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes]; + dist.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes]; + dist.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes]; + dist.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes]; + dist.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes]; + dist.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes]; + dist.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes]; + dist.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes]; + dist.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes]; + dist.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes]; + dist.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes]; + dist.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes]; + dist.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes]; + dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + dist.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes]; + dist.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes]; + dist.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes]; + dist.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes]; + dist.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes]; + dist.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes]; + dist.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes]; + dist.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes]; } else { - dist.f[DIR_M00 ] = &distributions[DIR_P00 *size_Mat]; - dist.f[DIR_P00 ] = &distributions[DIR_M00 *size_Mat]; - dist.f[DIR_0M0 ] = &distributions[DIR_0P0 *size_Mat]; - dist.f[DIR_0P0 ] = &distributions[DIR_0M0 *size_Mat]; - dist.f[DIR_00M ] = &distributions[DIR_00P *size_Mat]; - dist.f[DIR_00P ] = &distributions[DIR_00M *size_Mat]; - dist.f[DIR_MM0 ] = &distributions[DIR_PP0 *size_Mat]; - dist.f[DIR_PP0 ] = &distributions[DIR_MM0 *size_Mat]; - dist.f[DIR_MP0 ] = &distributions[DIR_PM0 *size_Mat]; - dist.f[DIR_PM0 ] = &distributions[DIR_MP0 *size_Mat]; - dist.f[DIR_M0M ] = &distributions[DIR_P0P *size_Mat]; - dist.f[DIR_P0P ] = &distributions[DIR_M0M *size_Mat]; - dist.f[DIR_M0P ] = &distributions[DIR_P0M *size_Mat]; - dist.f[DIR_P0M ] = &distributions[DIR_M0P *size_Mat]; - dist.f[DIR_0MM ] = &distributions[DIR_0PP *size_Mat]; - dist.f[DIR_0PP ] = &distributions[DIR_0MM *size_Mat]; - dist.f[DIR_0MP ] = &distributions[DIR_0PM *size_Mat]; - dist.f[DIR_0PM ] = &distributions[DIR_0MP *size_Mat]; - dist.f[DIR_000] = &distributions[DIR_000*size_Mat]; - dist.f[DIR_MMM ] = &distributions[DIR_PPP *size_Mat]; - dist.f[DIR_PPM ] = &distributions[DIR_MMP *size_Mat]; - dist.f[DIR_MPM ] = &distributions[DIR_PMP *size_Mat]; - dist.f[DIR_PMM ] = &distributions[DIR_MPP *size_Mat]; - dist.f[DIR_MMP ] = &distributions[DIR_PPM *size_Mat]; - dist.f[DIR_PPP ] = &distributions[DIR_MMM *size_Mat]; - dist.f[DIR_MPP ] = &distributions[DIR_PMM *size_Mat]; - dist.f[DIR_PMP ] = &distributions[DIR_MPM *size_Mat]; + dist.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes]; + dist.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes]; + dist.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes]; + dist.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes]; + dist.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes]; + dist.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes]; + dist.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes]; + dist.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes]; + dist.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes]; + dist.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes]; + dist.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes]; + dist.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes]; + dist.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes]; + dist.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes]; + dist.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes]; + dist.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes]; + dist.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes]; + dist.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes]; + dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + dist.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes]; + dist.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes]; + dist.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes]; + dist.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes]; + dist.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes]; + dist.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes]; + dist.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes]; + dist.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// Distributions27 distAD; if (isEvenTimestep) { - distAD.f[DIR_P00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_M00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat]; + distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } else { - distAD.f[DIR_M00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_P00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat]; + distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) @@ -241,63 +241,63 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( //////////////////////////////////////////////////////////////////////////////////// //! - Set local distributions Fluid //! - real fcbb = (dist.f[DIR_P00 ])[k]; - real fabb = (dist.f[DIR_M00 ])[kw]; - real fbcb = (dist.f[DIR_0P0 ])[k]; - real fbab = (dist.f[DIR_0M0 ])[ks]; - real fbbc = (dist.f[DIR_00P ])[k]; - real fbba = (dist.f[DIR_00M ])[kb]; - real fccb = (dist.f[DIR_PP0 ])[k]; - real faab = (dist.f[DIR_MM0 ])[ksw]; - real fcab = (dist.f[DIR_PM0 ])[ks]; - real facb = (dist.f[DIR_MP0 ])[kw]; - real fcbc = (dist.f[DIR_P0P ])[k]; - real faba = (dist.f[DIR_M0M ])[kbw]; - real fcba = (dist.f[DIR_P0M ])[kb]; - real fabc = (dist.f[DIR_M0P ])[kw]; - real fbcc = (dist.f[DIR_0PP ])[k]; - real fbaa = (dist.f[DIR_0MM ])[kbs]; - real fbca = (dist.f[DIR_0PM ])[kb]; - real fbac = (dist.f[DIR_0MP ])[ks]; + real fcbb = (dist.f[DIR_P00])[k]; + real fabb = (dist.f[DIR_M00])[kw]; + real fbcb = (dist.f[DIR_0P0])[k]; + real fbab = (dist.f[DIR_0M0])[ks]; + real fbbc = (dist.f[DIR_00P])[k]; + real fbba = (dist.f[DIR_00M])[kb]; + real fccb = (dist.f[DIR_PP0])[k]; + real faab = (dist.f[DIR_MM0])[ksw]; + real fcab = (dist.f[DIR_PM0])[ks]; + real facb = (dist.f[DIR_MP0])[kw]; + real fcbc = (dist.f[DIR_P0P])[k]; + real faba = (dist.f[DIR_M0M])[kbw]; + real fcba = (dist.f[DIR_P0M])[kb]; + real fabc = (dist.f[DIR_M0P])[kw]; + real fbcc = (dist.f[DIR_0PP])[k]; + real fbaa = (dist.f[DIR_0MM])[kbs]; + real fbca = (dist.f[DIR_0PM])[kb]; + real fbac = (dist.f[DIR_0MP])[ks]; real fbbb = (dist.f[DIR_000])[k]; - real fccc = (dist.f[DIR_PPP ])[k]; - real faac = (dist.f[DIR_MMP ])[ksw]; - real fcac = (dist.f[DIR_PMP ])[ks]; - real facc = (dist.f[DIR_MPP ])[kw]; - real fcca = (dist.f[DIR_PPM ])[kb]; - real faaa = (dist.f[DIR_MMM ])[kbsw]; - real fcaa = (dist.f[DIR_PMM ])[kbs]; - real faca = (dist.f[DIR_MPM ])[kbw]; + real fccc = (dist.f[DIR_PPP])[k]; + real faac = (dist.f[DIR_MMP])[ksw]; + real fcac = (dist.f[DIR_PMP])[ks]; + real facc = (dist.f[DIR_MPP])[kw]; + real fcca = (dist.f[DIR_PPM])[kb]; + real faaa = (dist.f[DIR_MMM])[kbsw]; + real fcaa = (dist.f[DIR_PMM])[kbs]; + real faca = (dist.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// //! - Set local distributions Advection Diffusion //! - real mfcbb = (distAD.f[DIR_P00 ])[k]; - real mfabb = (distAD.f[DIR_M00 ])[kw]; - real mfbcb = (distAD.f[DIR_0P0 ])[k]; - real mfbab = (distAD.f[DIR_0M0 ])[ks]; - real mfbbc = (distAD.f[DIR_00P ])[k]; - real mfbba = (distAD.f[DIR_00M ])[kb]; - real mfccb = (distAD.f[DIR_PP0 ])[k]; - real mfaab = (distAD.f[DIR_MM0 ])[ksw]; - real mfcab = (distAD.f[DIR_PM0 ])[ks]; - real mfacb = (distAD.f[DIR_MP0 ])[kw]; - real mfcbc = (distAD.f[DIR_P0P ])[k]; - real mfaba = (distAD.f[DIR_M0M ])[kbw]; - real mfcba = (distAD.f[DIR_P0M ])[kb]; - real mfabc = (distAD.f[DIR_M0P ])[kw]; - real mfbcc = (distAD.f[DIR_0PP ])[k]; - real mfbaa = (distAD.f[DIR_0MM ])[kbs]; - real mfbca = (distAD.f[DIR_0PM ])[kb]; - real mfbac = (distAD.f[DIR_0MP ])[ks]; + real mfcbb = (distAD.f[DIR_P00])[k]; + real mfabb = (distAD.f[DIR_M00])[kw]; + real mfbcb = (distAD.f[DIR_0P0])[k]; + real mfbab = (distAD.f[DIR_0M0])[ks]; + real mfbbc = (distAD.f[DIR_00P])[k]; + real mfbba = (distAD.f[DIR_00M])[kb]; + real mfccb = (distAD.f[DIR_PP0])[k]; + real mfaab = (distAD.f[DIR_MM0])[ksw]; + real mfcab = (distAD.f[DIR_PM0])[ks]; + real mfacb = (distAD.f[DIR_MP0])[kw]; + real mfcbc = (distAD.f[DIR_P0P])[k]; + real mfaba = (distAD.f[DIR_M0M])[kbw]; + real mfcba = (distAD.f[DIR_P0M])[kb]; + real mfabc = (distAD.f[DIR_M0P])[kw]; + real mfbcc = (distAD.f[DIR_0PP])[k]; + real mfbaa = (distAD.f[DIR_0MM])[kbs]; + real mfbca = (distAD.f[DIR_0PM])[kb]; + real mfbac = (distAD.f[DIR_0MP])[ks]; real mfbbb = (distAD.f[DIR_000])[k]; - real mfccc = (distAD.f[DIR_PPP ])[k]; - real mfaac = (distAD.f[DIR_MMP ])[ksw]; - real mfcac = (distAD.f[DIR_PMP ])[ks]; - real mfacc = (distAD.f[DIR_MPP ])[kw]; - real mfcca = (distAD.f[DIR_PPM ])[kb]; - real mfaaa = (distAD.f[DIR_MMM ])[kbsw]; - real mfcaa = (distAD.f[DIR_PMM ])[kbs]; - real mfaca = (distAD.f[DIR_MPM ])[kbw]; + real mfccc = (distAD.f[DIR_PPP])[k]; + real mfaac = (distAD.f[DIR_MMP])[ksw]; + real mfcac = (distAD.f[DIR_PMP])[ks]; + real mfacc = (distAD.f[DIR_MPP])[kw]; + real mfcca = (distAD.f[DIR_PPM])[kb]; + real mfaaa = (distAD.f[DIR_MMM])[kbsw]; + real mfcaa = (distAD.f[DIR_PMM])[kbs]; + real mfaca = (distAD.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> @@ -503,33 +503,33 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( //! stored arrays dependent on timestep is based on the esoteric twist algorithm //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! - (distAD.f[DIR_P00 ])[k ] = mfabb; - (distAD.f[DIR_M00 ])[kw ] = mfcbb; - (distAD.f[DIR_0P0 ])[k ] = mfbab; - (distAD.f[DIR_0M0 ])[ks ] = mfbcb; - (distAD.f[DIR_00P ])[k ] = mfbba; - (distAD.f[DIR_00M ])[kb ] = mfbbc; - (distAD.f[DIR_PP0 ])[k ] = mfaab; - (distAD.f[DIR_MM0 ])[ksw ] = mfccb; - (distAD.f[DIR_PM0 ])[ks ] = mfacb; - (distAD.f[DIR_MP0 ])[kw ] = mfcab; - (distAD.f[DIR_P0P ])[k ] = mfaba; - (distAD.f[DIR_M0M ])[kbw ] = mfcbc; - (distAD.f[DIR_P0M ])[kb ] = mfabc; - (distAD.f[DIR_M0P ])[kw ] = mfcba; - (distAD.f[DIR_0PP ])[k ] = mfbaa; - (distAD.f[DIR_0MM ])[kbs ] = mfbcc; - (distAD.f[DIR_0PM ])[kb ] = mfbac; - (distAD.f[DIR_0MP ])[ks ] = mfbca; + (distAD.f[DIR_P00])[k ] = mfabb; + (distAD.f[DIR_M00])[kw ] = mfcbb; + (distAD.f[DIR_0P0])[k ] = mfbab; + (distAD.f[DIR_0M0])[ks ] = mfbcb; + (distAD.f[DIR_00P])[k ] = mfbba; + (distAD.f[DIR_00M])[kb ] = mfbbc; + (distAD.f[DIR_PP0])[k ] = mfaab; + (distAD.f[DIR_MM0])[ksw ] = mfccb; + (distAD.f[DIR_PM0])[ks ] = mfacb; + (distAD.f[DIR_MP0])[kw ] = mfcab; + (distAD.f[DIR_P0P])[k ] = mfaba; + (distAD.f[DIR_M0M])[kbw ] = mfcbc; + (distAD.f[DIR_P0M])[kb ] = mfabc; + (distAD.f[DIR_M0P])[kw ] = mfcba; + (distAD.f[DIR_0PP])[k ] = mfbaa; + (distAD.f[DIR_0MM])[kbs ] = mfbcc; + (distAD.f[DIR_0PM])[kb ] = mfbac; + (distAD.f[DIR_0MP])[ks ] = mfbca; (distAD.f[DIR_000])[k ] = mfbbb; - (distAD.f[DIR_PPP ])[k ] = mfaaa; - (distAD.f[DIR_PMP ])[ks ] = mfaca; - (distAD.f[DIR_PPM ])[kb ] = mfaac; - (distAD.f[DIR_PMM ])[kbs ] = mfacc; - (distAD.f[DIR_MPP ])[kw ] = mfcaa; - (distAD.f[DIR_MMP ])[ksw ] = mfcca; - (distAD.f[DIR_MPM ])[kbw ] = mfcac; - (distAD.f[DIR_MMM ])[kbsw] = mfccc; + (distAD.f[DIR_PPP])[k ] = mfaaa; + (distAD.f[DIR_PMP])[ks ] = mfaca; + (distAD.f[DIR_PPM])[kb ] = mfaac; + (distAD.f[DIR_PMM])[kbs ] = mfacc; + (distAD.f[DIR_MPP])[kw ] = mfcaa; + (distAD.f[DIR_MMP])[ksw ] = mfcca; + (distAD.f[DIR_MPM])[kbw ] = mfcac; + (distAD.f[DIR_MMM])[kbsw] = mfccc; } } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu index ecf98a7494a0a5e1c81c1040917e941f066605e6..116ce20389985e0efa650598108224b2e3e25221 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu @@ -20,91 +20,91 @@ __global__ void QADPress7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -128,24 +128,24 @@ __global__ void QADPress7( real* DD, // *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, // *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -188,32 +188,32 @@ __global__ void QADPress7( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// /*real drho*/; //real vx1_Inflow = zero; @@ -293,23 +293,23 @@ __global__ void QADPress7( real* DD, //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -461,131 +461,131 @@ __global__ void QADPress27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -606,24 +606,24 @@ __global__ void QADPress27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -663,33 +663,33 @@ __global__ void QADPress27( real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, /*drho, feq,*/ q; //drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -715,33 +715,33 @@ __global__ void QADPress27( real* DD, vx2 = OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); vx3 = OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - real f27_W = (D27.f[DIR_P00 ])[ke ]; - real f27_E = (D27.f[DIR_M00 ])[kw ]; - real f27_S = (D27.f[DIR_0P0 ])[kn ]; - real f27_N = (D27.f[DIR_0M0 ])[ks ]; - real f27_B = (D27.f[DIR_00P ])[kt ]; - real f27_T = (D27.f[DIR_00M ])[kb ]; - real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - real f27_BW = (D27.f[DIR_P0P ])[kte ]; - real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - real f27_BN = (D27.f[DIR_0MP ])[kts ]; + real f27_W = (D27.f[DIR_P00])[ke ]; + real f27_E = (D27.f[DIR_M00])[kw ]; + real f27_S = (D27.f[DIR_0P0])[kn ]; + real f27_N = (D27.f[DIR_0M0])[ks ]; + real f27_B = (D27.f[DIR_00P])[kt ]; + real f27_T = (D27.f[DIR_00M])[kb ]; + real f27_SW = (D27.f[DIR_PP0])[kne ]; + real f27_NE = (D27.f[DIR_MM0])[ksw ]; + real f27_NW = (D27.f[DIR_PM0])[kse ]; + real f27_SE = (D27.f[DIR_MP0])[knw ]; + real f27_BW = (D27.f[DIR_P0P])[kte ]; + real f27_TE = (D27.f[DIR_M0M])[kbw ]; + real f27_TW = (D27.f[DIR_P0M])[kbe ]; + real f27_BE = (D27.f[DIR_M0P])[ktw ]; + real f27_BS = (D27.f[DIR_0PP])[ktn ]; + real f27_TN = (D27.f[DIR_0MM])[kbs ]; + real f27_TS = (D27.f[DIR_0PM])[kbn ]; + real f27_BN = (D27.f[DIR_0MP])[kts ]; real f27_ZERO = (D27.f[DIR_000])[kzero]; - real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -849,86 +849,86 @@ __global__ void QADPress27( real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test //(D.f[DIR_000])[k]=c1o10; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1); q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1); q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1); @@ -989,132 +989,132 @@ __global__ void QADPressNEQNeighbor27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep ) { Distributions27 D; if (isEvenTimestep == true) { - D.f[DIR_P00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep == true) { - D27.f[DIR_P00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1345,33 +1345,33 @@ __global__ void QADPressNEQNeighbor27( unsigned int kNbsw = neighborZ[kNsw]; //////////////////////////////////////////////////////////////////////////////// //update distributions at neighbor nodes - (D27.f[DIR_P00 ])[kNe ] = f27_W ; - (D27.f[DIR_M00 ])[kNw ] = f27_E ; - (D27.f[DIR_0P0 ])[kNn ] = f27_S ; - (D27.f[DIR_0M0 ])[kNs ] = f27_N ; - (D27.f[DIR_00P ])[kNt ] = f27_B ; - (D27.f[DIR_00M ])[kNb ] = f27_T ; - (D27.f[DIR_PP0 ])[kNne ] = f27_SW ; - (D27.f[DIR_MM0 ])[kNsw ] = f27_NE ; - (D27.f[DIR_PM0 ])[kNse ] = f27_NW ; - (D27.f[DIR_MP0 ])[kNnw ] = f27_SE ; - (D27.f[DIR_P0P ])[kNte ] = f27_BW ; - (D27.f[DIR_M0M ])[kNbw ] = f27_TE ; - (D27.f[DIR_P0M ])[kNbe ] = f27_TW ; - (D27.f[DIR_M0P ])[kNtw ] = f27_BE ; - (D27.f[DIR_0PP ])[kNtn ] = f27_BS ; - (D27.f[DIR_0MM ])[kNbs ] = f27_TN ; - (D27.f[DIR_0PM ])[kNbn ] = f27_TS ; - (D27.f[DIR_0MP ])[kNts ] = f27_BN ; + (D27.f[DIR_P00])[kNe ] = f27_W ; + (D27.f[DIR_M00])[kNw ] = f27_E ; + (D27.f[DIR_0P0])[kNn ] = f27_S ; + (D27.f[DIR_0M0])[kNs ] = f27_N ; + (D27.f[DIR_00P])[kNt ] = f27_B ; + (D27.f[DIR_00M])[kNb ] = f27_T ; + (D27.f[DIR_PP0])[kNne ] = f27_SW ; + (D27.f[DIR_MM0])[kNsw ] = f27_NE ; + (D27.f[DIR_PM0])[kNse ] = f27_NW ; + (D27.f[DIR_MP0])[kNnw ] = f27_SE ; + (D27.f[DIR_P0P])[kNte ] = f27_BW ; + (D27.f[DIR_M0M])[kNbw ] = f27_TE ; + (D27.f[DIR_P0M])[kNbe ] = f27_TW ; + (D27.f[DIR_M0P])[kNtw ] = f27_BE ; + (D27.f[DIR_0PP])[kNtn ] = f27_BS ; + (D27.f[DIR_0MM])[kNbs ] = f27_TN ; + (D27.f[DIR_0PM])[kNbn ] = f27_TS ; + (D27.f[DIR_0MP])[kNts ] = f27_BN ; (D27.f[DIR_000])[kNzero] = f27_ZERO; - (D27.f[DIR_PPP ])[kNtne ] = f27_BSW ; - (D27.f[DIR_MMP ])[kNtsw ] = f27_BNE ; - (D27.f[DIR_PMP ])[kNtse ] = f27_BNW ; - (D27.f[DIR_MPP ])[kNtnw ] = f27_BSE ; - (D27.f[DIR_PPM ])[kNbne ] = f27_TSW ; - (D27.f[DIR_MMM ])[kNbsw ] = f27_TNE ; - (D27.f[DIR_PMM ])[kNbse ] = f27_TNW ; - (D27.f[DIR_MPM ])[kNbnw ] = f27_TSE ; + (D27.f[DIR_PPP])[kNtne ] = f27_BSW ; + (D27.f[DIR_MMP])[kNtsw ] = f27_BNE ; + (D27.f[DIR_PMP])[kNtse ] = f27_BNW ; + (D27.f[DIR_MPP])[kNtnw ] = f27_BSE ; + (D27.f[DIR_PPM])[kNbne ] = f27_TSW ; + (D27.f[DIR_MMM])[kNbsw ] = f27_TNE ; + (D27.f[DIR_PMM])[kNbse ] = f27_TNW ; + (D27.f[DIR_MPM])[kNbnw ] = f27_TSE ; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1427,91 +1427,91 @@ __global__ void QADVel7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -1531,12 +1531,12 @@ __global__ void QADVel7( real* DD, ////////////////////////////////////////////////////////////////////////////////// real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB;//, - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; ////////////////////////////////////////////////////////////////////////////////// //index unsigned int KQK = k_Q[k]; @@ -1571,32 +1571,32 @@ __global__ void QADVel7( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// /*real drho*/; real vx1_Inflow = c0o1; @@ -1676,23 +1676,23 @@ __global__ void QADVel7( real* DD, //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1844,131 +1844,131 @@ __global__ void QADVel27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1989,24 +1989,24 @@ __global__ void QADVel27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -2046,33 +2046,33 @@ __global__ void QADVel27(real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, /*drho, feq,*/ q; ////drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -2098,33 +2098,33 @@ __global__ void QADVel27(real* DD, vx2 = OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); vx3 = OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - //real f27_W = (D27.f[DIR_P00 ])[ke ]; - //real f27_E = (D27.f[DIR_M00 ])[kw ]; - //real f27_S = (D27.f[DIR_0P0 ])[kn ]; - //real f27_N = (D27.f[DIR_0M0 ])[ks ]; - //real f27_B = (D27.f[DIR_00P ])[kt ]; - //real f27_T = (D27.f[DIR_00M ])[kb ]; - //real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - //real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - //real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - //real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - //real f27_BW = (D27.f[DIR_P0P ])[kte ]; - //real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - //real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - //real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - //real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - //real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - //real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - //real f27_BN = (D27.f[DIR_0MP ])[kts ]; + //real f27_W = (D27.f[DIR_P00])[ke ]; + //real f27_E = (D27.f[DIR_M00])[kw ]; + //real f27_S = (D27.f[DIR_0P0])[kn ]; + //real f27_N = (D27.f[DIR_0M0])[ks ]; + //real f27_B = (D27.f[DIR_00P])[kt ]; + //real f27_T = (D27.f[DIR_00M])[kb ]; + //real f27_SW = (D27.f[DIR_PP0])[kne ]; + //real f27_NE = (D27.f[DIR_MM0])[ksw ]; + //real f27_NW = (D27.f[DIR_PM0])[kse ]; + //real f27_SE = (D27.f[DIR_MP0])[knw ]; + //real f27_BW = (D27.f[DIR_P0P])[kte ]; + //real f27_TE = (D27.f[DIR_M0M])[kbw ]; + //real f27_TW = (D27.f[DIR_P0M])[kbe ]; + //real f27_BE = (D27.f[DIR_M0P])[ktw ]; + //real f27_BS = (D27.f[DIR_0PP])[ktn ]; + //real f27_TN = (D27.f[DIR_0MM])[kbs ]; + //real f27_TS = (D27.f[DIR_0PM])[kbn ]; + //real f27_BN = (D27.f[DIR_0MP])[kts ]; //real f27_ZERO = (D27.f[DIR_000])[kzero]; - //real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - //real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - //real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - //real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - //real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - //real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - //real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - //real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + //real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + //real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + //real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + //real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + //real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + //real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + //real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + //real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -2233,63 +2233,63 @@ __global__ void QADVel27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -2299,24 +2299,24 @@ __global__ void QADVel27(real* DD, //Test //(D.f[DIR_000])[k]=c1o10; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //(D27.f[DIR_M00 ])[kw ]= four; - //(D27.f[DIR_P00 ])[ke ]= four; - //(D27.f[DIR_0M0 ])[ks ]= four; - //(D27.f[DIR_0P0 ])[kn ]= four; - //(D27.f[DIR_00M ])[kb ]= four; - //(D27.f[DIR_00P ])[kt ]= four; - //(D27.f[DIR_MM0 ])[ksw ]= four; - //(D27.f[DIR_PP0 ])[kne ]= four; - //(D27.f[DIR_MP0 ])[knw ]= four; - //(D27.f[DIR_PM0 ])[kse ]= four; - //(D27.f[DIR_M0M ])[kbw ]= four; - //(D27.f[DIR_P0P ])[kte ]= four; - //(D27.f[DIR_M0P ])[ktw ]= four; - //(D27.f[DIR_P0M ])[kbe ]= four; - //(D27.f[DIR_0MM ])[kbs ]= four; - //(D27.f[DIR_0PP ])[ktn ]= four; - //(D27.f[DIR_0MP ])[kts ]= four; - //(D27.f[DIR_0PM ])[kbn ]= four; + //(D27.f[DIR_M00])[kw ]= four; + //(D27.f[DIR_P00])[ke ]= four; + //(D27.f[DIR_0M0])[ks ]= four; + //(D27.f[DIR_0P0])[kn ]= four; + //(D27.f[DIR_00M])[kb ]= four; + //(D27.f[DIR_00P])[kt ]= four; + //(D27.f[DIR_MM0])[ksw ]= four; + //(D27.f[DIR_PP0])[kne ]= four; + //(D27.f[DIR_MP0])[knw ]= four; + //(D27.f[DIR_PM0])[kse ]= four; + //(D27.f[DIR_M0M])[kbw ]= four; + //(D27.f[DIR_P0P])[kte ]= four; + //(D27.f[DIR_M0P])[ktw ]= four; + //(D27.f[DIR_P0M])[kbe ]= four; + //(D27.f[DIR_0MM])[kbs ]= four; + //(D27.f[DIR_0PP])[ktn ]= four; + //(D27.f[DIR_0MP])[kts ]= four; + //(D27.f[DIR_0PM])[kbn ]= four; //(D27.f[DIR_MMM])[kbsw]= four; //(D27.f[DIR_PPP])[ktne]= four; //(D27.f[DIR_MMP])[ktsw]= four; @@ -2325,24 +2325,24 @@ __global__ void QADVel27(real* DD, //(D27.f[DIR_PMP])[ktse]= four; //(D27.f[DIR_MPP])[ktnw]= four; //(D27.f[DIR_PMM])[kbse]= four; - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD; q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD; q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD; @@ -2351,24 +2351,24 @@ __global__ void QADVel27(real* DD, q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD; q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD; q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD; - //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00 ])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); - //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00 ])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); - //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0 ])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); - //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0 ])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); - //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M ])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); - //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P ])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); - //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); - //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); - //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); - //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); - //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); - //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); - //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); - //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); - //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); - //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); - //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); - //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); + //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); + //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); + //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); + //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); + //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); + //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); + //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); + //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); + //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); + //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); + //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); + //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); + //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); + //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); + //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); + //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); + //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); + //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one); //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one); //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one); @@ -2431,91 +2431,91 @@ __global__ void QAD7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -2539,24 +2539,24 @@ __global__ void QAD7( real* DD, // *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, // *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -2599,32 +2599,32 @@ __global__ void QAD7( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3/*, drho*/; //drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -2696,23 +2696,23 @@ __global__ void QAD7( real* DD, //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2864,131 +2864,131 @@ __global__ void QADDirichlet27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -3009,24 +3009,24 @@ __global__ void QADDirichlet27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -3066,33 +3066,33 @@ __global__ void QADDirichlet27( unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, /*drho, feq,*/ q; ////drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -3118,33 +3118,33 @@ __global__ void QADDirichlet27( vx2 = OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); vx3 = OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - real f27_W = (D27.f[DIR_P00 ])[ke ]; - real f27_E = (D27.f[DIR_M00 ])[kw ]; - real f27_S = (D27.f[DIR_0P0 ])[kn ]; - real f27_N = (D27.f[DIR_0M0 ])[ks ]; - real f27_B = (D27.f[DIR_00P ])[kt ]; - real f27_T = (D27.f[DIR_00M ])[kb ]; - real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - real f27_BW = (D27.f[DIR_P0P ])[kte ]; - real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - real f27_BN = (D27.f[DIR_0MP ])[kts ]; + real f27_W = (D27.f[DIR_P00])[ke ]; + real f27_E = (D27.f[DIR_M00])[kw ]; + real f27_S = (D27.f[DIR_0P0])[kn ]; + real f27_N = (D27.f[DIR_0M0])[ks ]; + real f27_B = (D27.f[DIR_00P])[kt ]; + real f27_T = (D27.f[DIR_00M])[kb ]; + real f27_SW = (D27.f[DIR_PP0])[kne ]; + real f27_NE = (D27.f[DIR_MM0])[ksw ]; + real f27_NW = (D27.f[DIR_PM0])[kse ]; + real f27_SE = (D27.f[DIR_MP0])[knw ]; + real f27_BW = (D27.f[DIR_P0P])[kte ]; + real f27_TE = (D27.f[DIR_M0M])[kbw ]; + real f27_TW = (D27.f[DIR_P0M])[kbe ]; + real f27_BE = (D27.f[DIR_M0P])[ktw ]; + real f27_BS = (D27.f[DIR_0PP])[ktn ]; + real f27_TN = (D27.f[DIR_0MM])[kbs ]; + real f27_TS = (D27.f[DIR_0PM])[kbn ]; + real f27_BN = (D27.f[DIR_0MP])[kts ]; real f27_ZERO = (D27.f[DIR_000])[kzero]; - real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -3220,86 +3220,86 @@ __global__ void QADDirichlet27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test //(D.f[DIR_000])[k]=0.1f; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[ ke ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); - q = q_dirW[ kw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); - q = q_dirN[ kn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); - q = q_dirS[ ks ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); - q = q_dirT[ kt ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); - q = q_dirB[ kb ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); - q = q_dirNE[ kne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); - q = q_dirSW[ ksw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); - q = q_dirSE[ kse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); - q = q_dirNW[ knw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); - q = q_dirTE[ kte ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); - q = q_dirBW[ kbw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); - q = q_dirBE[ kbe ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); - q = q_dirTW[ ktw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); - q = q_dirTN[ ktn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); - q = q_dirBS[ kbs ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); - q = q_dirBN[ kbn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); - q = q_dirTS[ kts ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); + q = q_dirE[ ke ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); + q = q_dirW[ kw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); + q = q_dirN[ kn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); + q = q_dirS[ ks ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); + q = q_dirT[ kt ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); + q = q_dirB[ kb ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); + q = q_dirNE[ kne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); + q = q_dirSW[ ksw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); + q = q_dirSE[ kse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); + q = q_dirNW[ knw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); + q = q_dirTE[ kte ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); + q = q_dirBW[ kbw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); + q = q_dirBE[ kbe ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); + q = q_dirTW[ ktw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); + q = q_dirTN[ ktn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); + q = q_dirBS[ kbs ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); + q = q_dirBN[ kbn ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); + q = q_dirTS[ kts ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); q = q_dirTNE[ktne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1); q = q_dirBSW[kbsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1); q = q_dirBNE[kbne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1); @@ -3308,24 +3308,24 @@ __global__ void QADDirichlet27( q = q_dirBNW[kbnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1); q = q_dirBSE[kbse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1); q = q_dirTNW[ktnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1); - //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00 ])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); - //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00 ])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); - //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0 ])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); - //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0 ])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); - //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M ])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); - //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P ])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); - //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); - //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); - //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); - //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); - //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); - //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); - //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); - //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); - //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); - //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); - //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); - //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); + //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); + //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); + //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); + //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); + //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); + //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); + //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); + //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); + //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); + //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); + //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); + //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); + //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); + //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); + //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); + //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); + //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); + //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one); //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one); //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one); @@ -3389,131 +3389,131 @@ __global__ void QADBB27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //Distributions27 D; //if (isEvenTimestep==true) //{ - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; //} //else //{ - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; //} Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -3534,24 +3534,24 @@ __global__ void QADBB27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -3591,33 +3591,33 @@ __global__ void QADBB27( real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - //real f_W = (D.f[DIR_P00 ])[ke ]; - //real f_E = (D.f[DIR_M00 ])[kw ]; - //real f_S = (D.f[DIR_0P0 ])[kn ]; - //real f_N = (D.f[DIR_0M0 ])[ks ]; - //real f_B = (D.f[DIR_00P ])[kt ]; - //real f_T = (D.f[DIR_00M ])[kb ]; - //real f_SW = (D.f[DIR_PP0 ])[kne ]; - //real f_NE = (D.f[DIR_MM0 ])[ksw ]; - //real f_NW = (D.f[DIR_PM0 ])[kse ]; - //real f_SE = (D.f[DIR_MP0 ])[knw ]; - //real f_BW = (D.f[DIR_P0P ])[kte ]; - //real f_TE = (D.f[DIR_M0M ])[kbw ]; - //real f_TW = (D.f[DIR_P0M ])[kbe ]; - //real f_BE = (D.f[DIR_M0P ])[ktw ]; - //real f_BS = (D.f[DIR_0PP ])[ktn ]; - //real f_TN = (D.f[DIR_0MM ])[kbs ]; - //real f_TS = (D.f[DIR_0PM ])[kbn ]; - //real f_BN = (D.f[DIR_0MP ])[kts ]; + //real f_W = (D.f[DIR_P00])[ke ]; + //real f_E = (D.f[DIR_M00])[kw ]; + //real f_S = (D.f[DIR_0P0])[kn ]; + //real f_N = (D.f[DIR_0M0])[ks ]; + //real f_B = (D.f[DIR_00P])[kt ]; + //real f_T = (D.f[DIR_00M])[kb ]; + //real f_SW = (D.f[DIR_PP0])[kne ]; + //real f_NE = (D.f[DIR_MM0])[ksw ]; + //real f_NW = (D.f[DIR_PM0])[kse ]; + //real f_SE = (D.f[DIR_MP0])[knw ]; + //real f_BW = (D.f[DIR_P0P])[kte ]; + //real f_TE = (D.f[DIR_M0M])[kbw ]; + //real f_TW = (D.f[DIR_P0M])[kbe ]; + //real f_BE = (D.f[DIR_M0P])[ktw ]; + //real f_BS = (D.f[DIR_0PP])[ktn ]; + //real f_TN = (D.f[DIR_0MM])[kbs ]; + //real f_TS = (D.f[DIR_0PM])[kbn ]; + //real f_BN = (D.f[DIR_0MP])[kts ]; //real f_ZERO = (D.f[DIR_000])[kzero]; - //real f_BSW = (D.f[DIR_PPP ])[ktne ]; - //real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //real f_BNW = (D.f[DIR_PMP ])[ktse ]; - //real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //real f_TSW = (D.f[DIR_PPM ])[kbne ]; - //real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //real f_TNW = (D.f[DIR_PMM ])[kbse ]; - //real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + //real f_BSW = (D.f[DIR_PPP])[ktne ]; + //real f_BNE = (D.f[DIR_MMP])[ktsw ]; + //real f_BNW = (D.f[DIR_PMP])[ktse ]; + //real f_BSE = (D.f[DIR_MPP])[ktnw ]; + //real f_TSW = (D.f[DIR_PPM])[kbne ]; + //real f_TNE = (D.f[DIR_MMM])[kbsw ]; + //real f_TNW = (D.f[DIR_PMM])[kbse ]; + //real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //real vx1, vx2, vx3, /*drho, feq,*/ q; real q; @@ -3644,33 +3644,33 @@ __global__ void QADBB27( real* DD, //vx2 = OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); //vx3 = OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - real f27_W = (D27.f[DIR_P00 ])[ke ]; - real f27_E = (D27.f[DIR_M00 ])[kw ]; - real f27_S = (D27.f[DIR_0P0 ])[kn ]; - real f27_N = (D27.f[DIR_0M0 ])[ks ]; - real f27_B = (D27.f[DIR_00P ])[kt ]; - real f27_T = (D27.f[DIR_00M ])[kb ]; - real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - real f27_BW = (D27.f[DIR_P0P ])[kte ]; - real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - real f27_BN = (D27.f[DIR_0MP ])[kts ]; + real f27_W = (D27.f[DIR_P00])[ke ]; + real f27_E = (D27.f[DIR_M00])[kw ]; + real f27_S = (D27.f[DIR_0P0])[kn ]; + real f27_N = (D27.f[DIR_0M0])[ks ]; + real f27_B = (D27.f[DIR_00P])[kt ]; + real f27_T = (D27.f[DIR_00M])[kb ]; + real f27_SW = (D27.f[DIR_PP0])[kne ]; + real f27_NE = (D27.f[DIR_MM0])[ksw ]; + real f27_NW = (D27.f[DIR_PM0])[kse ]; + real f27_SE = (D27.f[DIR_MP0])[knw ]; + real f27_BW = (D27.f[DIR_P0P])[kte ]; + real f27_TE = (D27.f[DIR_M0M])[kbw ]; + real f27_TW = (D27.f[DIR_P0M])[kbe ]; + real f27_BE = (D27.f[DIR_M0P])[ktw ]; + real f27_BS = (D27.f[DIR_0PP])[ktn ]; + real f27_TN = (D27.f[DIR_0MM])[kbs ]; + real f27_TS = (D27.f[DIR_0PM])[kbn ]; + real f27_BN = (D27.f[DIR_0MP])[kts ]; //real f27_ZERO = (D27.f[DIR_000])[kzero]; - real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -3746,86 +3746,86 @@ __global__ void QADBB27( real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test //(D.f[DIR_000])[k]=0.1f; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]=f27_E ; - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]=f27_W ; - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]=f27_N ; - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]=f27_S ; - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]=f27_T ; - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]=f27_B ; - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=f27_NE ; - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=f27_SW ; - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=f27_SE ; - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=f27_NW ; - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=f27_TE ; - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=f27_BW ; - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=f27_BE ; - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=f27_TW ; - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=f27_TN ; - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=f27_BS ; - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=f27_BN ; - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=f27_TS ; + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]=f27_E ; + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]=f27_W ; + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]=f27_N ; + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]=f27_S ; + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]=f27_T ; + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]=f27_B ; + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=f27_NE ; + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=f27_SW ; + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=f27_SE ; + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=f27_NW ; + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=f27_TE ; + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=f27_BW ; + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=f27_BE ; + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=f27_TW ; + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=f27_TN ; + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=f27_BS ; + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=f27_BN ; + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=f27_TS ; q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=f27_TNE; q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=f27_BSW; q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=f27_BNE; @@ -3905,91 +3905,91 @@ __global__ void QNoSlipADincomp7( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //Distributions27 D; //if (isEvenTimestep==true) //{ - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; //} //else //{ - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; //} Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -4009,12 +4009,12 @@ __global__ void QNoSlipADincomp7( ////////////////////////////////////////////////////////////////////////////////// real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; ////////////////////////////////////////////////////////////////////////////////// //index unsigned int KQK = k_Q[k]; @@ -4046,32 +4046,32 @@ __global__ void QNoSlipADincomp7( //unsigned int ktne = KQK; //unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - //real f_W = (D.f[DIR_P00 ])[ke ]; - //real f_E = (D.f[DIR_M00 ])[kw ]; - //real f_S = (D.f[DIR_0P0 ])[kn ]; - //real f_N = (D.f[DIR_0M0 ])[ks ]; - //real f_B = (D.f[DIR_00P ])[kt ]; - //real f_T = (D.f[DIR_00M ])[kb ]; - //real f_SW = (D.f[DIR_PP0 ])[kne ]; - //real f_NE = (D.f[DIR_MM0 ])[ksw ]; - //real f_NW = (D.f[DIR_PM0 ])[kse ]; - //real f_SE = (D.f[DIR_MP0 ])[knw ]; - //real f_BW = (D.f[DIR_P0P ])[kte ]; - //real f_TE = (D.f[DIR_M0M ])[kbw ]; - //real f_TW = (D.f[DIR_P0M ])[kbe ]; - //real f_BE = (D.f[DIR_M0P ])[ktw ]; - //real f_BS = (D.f[DIR_0PP ])[ktn ]; - //real f_TN = (D.f[DIR_0MM ])[kbs ]; - //real f_TS = (D.f[DIR_0PM ])[kbn ]; - //real f_BN = (D.f[DIR_0MP ])[kts ]; - //real f_BSW = (D.f[DIR_PPP ])[ktne ]; - //real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //real f_BNW = (D.f[DIR_PMP ])[ktse ]; - //real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //real f_TSW = (D.f[DIR_PPM ])[kbne ]; - //real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //real f_TNW = (D.f[DIR_PMM ])[kbse ]; - //real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + //real f_W = (D.f[DIR_P00])[ke ]; + //real f_E = (D.f[DIR_M00])[kw ]; + //real f_S = (D.f[DIR_0P0])[kn ]; + //real f_N = (D.f[DIR_0M0])[ks ]; + //real f_B = (D.f[DIR_00P])[kt ]; + //real f_T = (D.f[DIR_00M])[kb ]; + //real f_SW = (D.f[DIR_PP0])[kne ]; + //real f_NE = (D.f[DIR_MM0])[ksw ]; + //real f_NW = (D.f[DIR_PM0])[kse ]; + //real f_SE = (D.f[DIR_MP0])[knw ]; + //real f_BW = (D.f[DIR_P0P])[kte ]; + //real f_TE = (D.f[DIR_M0M])[kbw ]; + //real f_TW = (D.f[DIR_P0M])[kbe ]; + //real f_BE = (D.f[DIR_M0P])[ktw ]; + //real f_BS = (D.f[DIR_0PP])[ktn ]; + //real f_TN = (D.f[DIR_0MM])[kbs ]; + //real f_TS = (D.f[DIR_0PM])[kbn ]; + //real f_BN = (D.f[DIR_0MP])[kts ]; + //real f_BSW = (D.f[DIR_PPP])[ktne ]; + //real f_BNE = (D.f[DIR_MMP])[ktsw ]; + //real f_BNW = (D.f[DIR_PMP])[ktse ]; + //real f_BSE = (D.f[DIR_MPP])[ktnw ]; + //real f_TSW = (D.f[DIR_PPM])[kbne ]; + //real f_TNE = (D.f[DIR_MMM])[kbsw ]; + //real f_TNW = (D.f[DIR_PMM])[kbse ]; + //real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W)); //real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); @@ -4131,23 +4131,23 @@ __global__ void QNoSlipADincomp7( //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////// @@ -4329,131 +4329,131 @@ __global__ void QNoSlipADincomp27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4474,24 +4474,24 @@ __global__ void QNoSlipADincomp27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -4531,65 +4531,65 @@ __global__ void QNoSlipADincomp27( unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W)); real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - real f27_W = (D27.f[DIR_P00 ])[ke ]; - real f27_E = (D27.f[DIR_M00 ])[kw ]; - real f27_S = (D27.f[DIR_0P0 ])[kn ]; - real f27_N = (D27.f[DIR_0M0 ])[ks ]; - real f27_B = (D27.f[DIR_00P ])[kt ]; - real f27_T = (D27.f[DIR_00M ])[kb ]; - real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - real f27_BW = (D27.f[DIR_P0P ])[kte ]; - real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - real f27_BN = (D27.f[DIR_0MP ])[kts ]; + real f27_W = (D27.f[DIR_P00])[ke ]; + real f27_E = (D27.f[DIR_M00])[kw ]; + real f27_S = (D27.f[DIR_0P0])[kn ]; + real f27_N = (D27.f[DIR_0M0])[ks ]; + real f27_B = (D27.f[DIR_00P])[kt ]; + real f27_T = (D27.f[DIR_00M])[kb ]; + real f27_SW = (D27.f[DIR_PP0])[kne ]; + real f27_NE = (D27.f[DIR_MM0])[ksw ]; + real f27_NW = (D27.f[DIR_PM0])[kse ]; + real f27_SE = (D27.f[DIR_MP0])[knw ]; + real f27_BW = (D27.f[DIR_P0P])[kte ]; + real f27_TE = (D27.f[DIR_M0M])[kbw ]; + real f27_TW = (D27.f[DIR_P0M])[kbe ]; + real f27_BE = (D27.f[DIR_M0P])[ktw ]; + real f27_BS = (D27.f[DIR_0PP])[ktn ]; + real f27_TN = (D27.f[DIR_0MM])[kbs ]; + real f27_TS = (D27.f[DIR_0PM])[kbn ]; + real f27_BN = (D27.f[DIR_0MP])[kts ]; real f27_ZERO = (D27.f[DIR_000])[kzero]; - real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -4665,63 +4665,63 @@ __global__ void QNoSlipADincomp27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -4729,24 +4729,24 @@ __global__ void QNoSlipADincomp27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real q; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]=(c2o1*feqW27_W -(f27_E *(q*omegaD-c1o1)-omegaD*feq27_E *(q-c1o1))/(omegaD-c1o1)+f27_W *q)/(q+c1o1); + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]=(c2o1*feqW27_E -(f27_W *(q*omegaD-c1o1)-omegaD*feq27_W *(q-c1o1))/(omegaD-c1o1)+f27_E *q)/(q+c1o1); + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]=(c2o1*feqW27_S -(f27_N *(q*omegaD-c1o1)-omegaD*feq27_N *(q-c1o1))/(omegaD-c1o1)+f27_S *q)/(q+c1o1); + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]=(c2o1*feqW27_N -(f27_S *(q*omegaD-c1o1)-omegaD*feq27_S *(q-c1o1))/(omegaD-c1o1)+f27_N *q)/(q+c1o1); + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]=(c2o1*feqW27_B -(f27_T *(q*omegaD-c1o1)-omegaD*feq27_T *(q-c1o1))/(omegaD-c1o1)+f27_B *q)/(q+c1o1); + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]=(c2o1*feqW27_T -(f27_B *(q*omegaD-c1o1)-omegaD*feq27_B *(q-c1o1))/(omegaD-c1o1)+f27_T *q)/(q+c1o1); + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1); + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1); + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1); + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1); + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1); + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1); + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1); + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1); + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1); + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1); + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1); + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1); q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1); q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1); q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1); @@ -4811,91 +4811,91 @@ __global__ void QADVeloIncomp7( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //Distributions27 D; //if (isEvenTimestep==true) //{ - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; //} //else //{ - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; //} Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -4915,12 +4915,12 @@ __global__ void QADVeloIncomp7( ////////////////////////////////////////////////////////////////////////////////// real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; ////////////////////////////////////////////////////////////////////////////////// //index unsigned int KQK = k_Q[k]; @@ -4952,32 +4952,32 @@ __global__ void QADVeloIncomp7( //unsigned int ktne = KQK; //unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - //real f_W = (D.f[DIR_P00 ])[ke ]; - //real f_E = (D.f[DIR_M00 ])[kw ]; - //real f_S = (D.f[DIR_0P0 ])[kn ]; - //real f_N = (D.f[DIR_0M0 ])[ks ]; - //real f_B = (D.f[DIR_00P ])[kt ]; - //real f_T = (D.f[DIR_00M ])[kb ]; - //real f_SW = (D.f[DIR_PP0 ])[kne ]; - //real f_NE = (D.f[DIR_MM0 ])[ksw ]; - //real f_NW = (D.f[DIR_PM0 ])[kse ]; - //real f_SE = (D.f[DIR_MP0 ])[knw ]; - //real f_BW = (D.f[DIR_P0P ])[kte ]; - //real f_TE = (D.f[DIR_M0M ])[kbw ]; - //real f_TW = (D.f[DIR_P0M ])[kbe ]; - //real f_BE = (D.f[DIR_M0P ])[ktw ]; - //real f_BS = (D.f[DIR_0PP ])[ktn ]; - //real f_TN = (D.f[DIR_0MM ])[kbs ]; - //real f_TS = (D.f[DIR_0PM ])[kbn ]; - //real f_BN = (D.f[DIR_0MP ])[kts ]; - //real f_BSW = (D.f[DIR_PPP ])[ktne ]; - //real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //real f_BNW = (D.f[DIR_PMP ])[ktse ]; - //real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //real f_TSW = (D.f[DIR_PPM ])[kbne ]; - //real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //real f_TNW = (D.f[DIR_PMM ])[kbse ]; - //real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + //real f_W = (D.f[DIR_P00])[ke ]; + //real f_E = (D.f[DIR_M00])[kw ]; + //real f_S = (D.f[DIR_0P0])[kn ]; + //real f_N = (D.f[DIR_0M0])[ks ]; + //real f_B = (D.f[DIR_00P])[kt ]; + //real f_T = (D.f[DIR_00M])[kb ]; + //real f_SW = (D.f[DIR_PP0])[kne ]; + //real f_NE = (D.f[DIR_MM0])[ksw ]; + //real f_NW = (D.f[DIR_PM0])[kse ]; + //real f_SE = (D.f[DIR_MP0])[knw ]; + //real f_BW = (D.f[DIR_P0P])[kte ]; + //real f_TE = (D.f[DIR_M0M])[kbw ]; + //real f_TW = (D.f[DIR_P0M])[kbe ]; + //real f_BE = (D.f[DIR_M0P])[ktw ]; + //real f_BS = (D.f[DIR_0PP])[ktn ]; + //real f_TN = (D.f[DIR_0MM])[kbs ]; + //real f_TS = (D.f[DIR_0PM])[kbn ]; + //real f_BN = (D.f[DIR_0MP])[kts ]; + //real f_BSW = (D.f[DIR_PPP])[ktne ]; + //real f_BNE = (D.f[DIR_MMP])[ktsw ]; + //real f_BNW = (D.f[DIR_PMP])[ktse ]; + //real f_BSE = (D.f[DIR_MPP])[ktnw ]; + //real f_TSW = (D.f[DIR_PPM])[kbne ]; + //real f_TNE = (D.f[DIR_MMM])[kbsw ]; + //real f_TNW = (D.f[DIR_PMM])[kbse ]; + //real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //real vx1_Inflow = c0o1; //real vx2_Inflow = velo[k]; @@ -5091,23 +5091,23 @@ __global__ void QADVeloIncomp7( //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////// @@ -5289,131 +5289,131 @@ __global__ void QADVeloIncomp27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -5434,24 +5434,24 @@ __global__ void QADVeloIncomp27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -5491,65 +5491,65 @@ __global__ void QADVeloIncomp27( unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W)); real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - //real f27_W = (D27.f[DIR_P00 ])[ke ]; - //real f27_E = (D27.f[DIR_M00 ])[kw ]; - //real f27_S = (D27.f[DIR_0P0 ])[kn ]; - //real f27_N = (D27.f[DIR_0M0 ])[ks ]; - //real f27_B = (D27.f[DIR_00P ])[kt ]; - //real f27_T = (D27.f[DIR_00M ])[kb ]; - //real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - //real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - //real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - //real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - //real f27_BW = (D27.f[DIR_P0P ])[kte ]; - //real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - //real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - //real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - //real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - //real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - //real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - //real f27_BN = (D27.f[DIR_0MP ])[kts ]; + //real f27_W = (D27.f[DIR_P00])[ke ]; + //real f27_E = (D27.f[DIR_M00])[kw ]; + //real f27_S = (D27.f[DIR_0P0])[kn ]; + //real f27_N = (D27.f[DIR_0M0])[ks ]; + //real f27_B = (D27.f[DIR_00P])[kt ]; + //real f27_T = (D27.f[DIR_00M])[kb ]; + //real f27_SW = (D27.f[DIR_PP0])[kne ]; + //real f27_NE = (D27.f[DIR_MM0])[ksw ]; + //real f27_NW = (D27.f[DIR_PM0])[kse ]; + //real f27_SE = (D27.f[DIR_MP0])[knw ]; + //real f27_BW = (D27.f[DIR_P0P])[kte ]; + //real f27_TE = (D27.f[DIR_M0M])[kbw ]; + //real f27_TW = (D27.f[DIR_P0M])[kbe ]; + //real f27_BE = (D27.f[DIR_M0P])[ktw ]; + //real f27_BS = (D27.f[DIR_0PP])[ktn ]; + //real f27_TN = (D27.f[DIR_0MM])[kbs ]; + //real f27_TS = (D27.f[DIR_0PM])[kbn ]; + //real f27_BN = (D27.f[DIR_0MP])[kts ]; //real f27_ZERO = (D27.f[DIR_000])[kzero]; - //real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - //real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - //real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - //real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - //real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - //real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - //real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - //real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + //real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + //real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + //real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + //real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + //real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + //real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + //real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + //real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -5630,63 +5630,63 @@ __global__ void QADVeloIncomp27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -5694,24 +5694,24 @@ __global__ void QADVeloIncomp27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real q; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD; q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD; q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD; @@ -5720,24 +5720,24 @@ __global__ void QADVeloIncomp27( q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD; q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD; q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD; - //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00 ])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); - //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00 ])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); - //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0 ])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); - //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0 ])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); - //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M ])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); - //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P ])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); - //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); - //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); - //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); - //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); - //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); - //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); - //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); - //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); - //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); - //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); - //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); - //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); + //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); + //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); + //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); + //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); + //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); + //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); + //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); + //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); + //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); + //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); + //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); + //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); + //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); + //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); + //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); + //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); + //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); + //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one); //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one); //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one); @@ -5801,91 +5801,91 @@ __global__ void QADPressIncomp7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { /* Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; }*/ Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } @@ -5905,12 +5905,12 @@ __global__ void QADPressIncomp7( real* DD, ////////////////////////////////////////////////////////////////////////////////// real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; ////////////////////////////////////////////////////////////////////////////////// //index unsigned int KQK = k_Q[k]; @@ -5945,32 +5945,32 @@ __global__ void QADPressIncomp7( real* DD, /* real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ];*/ + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ];*/ //////////////////////////////////////////////////////////////////////////////// //real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W)); //real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); @@ -6035,23 +6035,23 @@ __global__ void QADPressIncomp7( real* DD, //pointertausch if (isEvenTimestep==false) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////// @@ -6240,131 +6240,131 @@ __global__ void QADPressIncomp27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -6385,24 +6385,24 @@ __global__ void QADPressIncomp27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -6442,65 +6442,65 @@ __global__ void QADPressIncomp27( unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W)); real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S)); real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B)); //////////////////////////////////////////////////////////////////////////////// - //real f27_W = (D27.f[DIR_P00 ])[ke ]; - //real f27_E = (D27.f[DIR_M00 ])[kw ]; - //real f27_S = (D27.f[DIR_0P0 ])[kn ]; - //real f27_N = (D27.f[DIR_0M0 ])[ks ]; - //real f27_B = (D27.f[DIR_00P ])[kt ]; - //real f27_T = (D27.f[DIR_00M ])[kb ]; - //real f27_SW = (D27.f[DIR_PP0 ])[kne ]; - //real f27_NE = (D27.f[DIR_MM0 ])[ksw ]; - //real f27_NW = (D27.f[DIR_PM0 ])[kse ]; - //real f27_SE = (D27.f[DIR_MP0 ])[knw ]; - //real f27_BW = (D27.f[DIR_P0P ])[kte ]; - //real f27_TE = (D27.f[DIR_M0M ])[kbw ]; - //real f27_TW = (D27.f[DIR_P0M ])[kbe ]; - //real f27_BE = (D27.f[DIR_M0P ])[ktw ]; - //real f27_BS = (D27.f[DIR_0PP ])[ktn ]; - //real f27_TN = (D27.f[DIR_0MM ])[kbs ]; - //real f27_TS = (D27.f[DIR_0PM ])[kbn ]; - //real f27_BN = (D27.f[DIR_0MP ])[kts ]; + //real f27_W = (D27.f[DIR_P00])[ke ]; + //real f27_E = (D27.f[DIR_M00])[kw ]; + //real f27_S = (D27.f[DIR_0P0])[kn ]; + //real f27_N = (D27.f[DIR_0M0])[ks ]; + //real f27_B = (D27.f[DIR_00P])[kt ]; + //real f27_T = (D27.f[DIR_00M])[kb ]; + //real f27_SW = (D27.f[DIR_PP0])[kne ]; + //real f27_NE = (D27.f[DIR_MM0])[ksw ]; + //real f27_NW = (D27.f[DIR_PM0])[kse ]; + //real f27_SE = (D27.f[DIR_MP0])[knw ]; + //real f27_BW = (D27.f[DIR_P0P])[kte ]; + //real f27_TE = (D27.f[DIR_M0M])[kbw ]; + //real f27_TW = (D27.f[DIR_P0M])[kbe ]; + //real f27_BE = (D27.f[DIR_M0P])[ktw ]; + //real f27_BS = (D27.f[DIR_0PP])[ktn ]; + //real f27_TN = (D27.f[DIR_0MM])[kbs ]; + //real f27_TS = (D27.f[DIR_0PM])[kbn ]; + //real f27_BN = (D27.f[DIR_0MP])[kts ]; //real f27_ZERO = (D27.f[DIR_000])[kzero]; - //real f27_BSW = (D27.f[DIR_PPP ])[ktne ]; - //real f27_BNE = (D27.f[DIR_MMP ])[ktsw ]; - //real f27_BNW = (D27.f[DIR_PMP ])[ktse ]; - //real f27_BSE = (D27.f[DIR_MPP ])[ktnw ]; - //real f27_TSW = (D27.f[DIR_PPM ])[kbne ]; - //real f27_TNE = (D27.f[DIR_MMM ])[kbsw ]; - //real f27_TNW = (D27.f[DIR_PMM ])[kbse ]; - //real f27_TSE = (D27.f[DIR_MPM ])[kbnw ]; + //real f27_BSW = (D27.f[DIR_PPP])[ktne ]; + //real f27_BNE = (D27.f[DIR_MMP])[ktsw ]; + //real f27_BNW = (D27.f[DIR_PMP])[ktse ]; + //real f27_BSE = (D27.f[DIR_MPP])[ktnw ]; + //real f27_TSW = (D27.f[DIR_PPM])[kbne ]; + //real f27_TNE = (D27.f[DIR_MMM])[kbsw ]; + //real f27_TNW = (D27.f[DIR_PMM])[kbse ]; + //real f27_TSE = (D27.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); //////////////////////////////////////////////////////////////////////////////// @@ -6581,63 +6581,63 @@ __global__ void QADPressIncomp27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -6645,24 +6645,24 @@ __global__ void QADPressIncomp27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real q; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00 ])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00 ])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0 ])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0 ])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M ])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P ])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw ]= -feqW27_W + c2o1 * c2o27 * TempD; + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke ]= -feqW27_E + c2o1 * c2o27 * TempD; + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks ]= -feqW27_S + c2o1 * c2o27 * TempD; + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn ]= -feqW27_N + c2o1 * c2o27 * TempD; + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb ]= -feqW27_B + c2o1 * c2o27 * TempD; + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt ]= -feqW27_T + c2o1 * c2o27 * TempD; + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54 * TempD; + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54 * TempD; + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54 * TempD; + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54 * TempD; + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54 * TempD; + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54 * TempD; + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54 * TempD; + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54 * TempD; + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54 * TempD; + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54 * TempD; + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54 * TempD; + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54 * TempD; q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD; q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD; q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD; @@ -6671,24 +6671,24 @@ __global__ void QADPressIncomp27( q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD; q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD; q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD; - //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00 ])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); - //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00 ])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); - //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0 ])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); - //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0 ])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); - //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M ])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); - //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P ])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); - //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); - //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); - //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); - //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); - //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); - //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); - //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); - //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); - //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); - //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); - //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); - //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); + //q = q_dirE[k]; if (q>=zero && q<=one) (D27.f[DIR_M00])[kw ]=(two*feqW27_W -(f27_E *(q*omegaD-one)-omegaD*feq27_E *(q-one))/(omegaD-one)+f27_W *q)/(q+one); + //q = q_dirW[k]; if (q>=zero && q<=one) (D27.f[DIR_P00])[ke ]=(two*feqW27_E -(f27_W *(q*omegaD-one)-omegaD*feq27_W *(q-one))/(omegaD-one)+f27_E *q)/(q+one); + //q = q_dirN[k]; if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks ]=(two*feqW27_S -(f27_N *(q*omegaD-one)-omegaD*feq27_N *(q-one))/(omegaD-one)+f27_S *q)/(q+one); + //q = q_dirS[k]; if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn ]=(two*feqW27_N -(f27_S *(q*omegaD-one)-omegaD*feq27_S *(q-one))/(omegaD-one)+f27_N *q)/(q+one); + //q = q_dirT[k]; if (q>=zero && q<=one) (D27.f[DIR_00M])[kb ]=(two*feqW27_B -(f27_T *(q*omegaD-one)-omegaD*feq27_T *(q-one))/(omegaD-one)+f27_B *q)/(q+one); + //q = q_dirB[k]; if (q>=zero && q<=one) (D27.f[DIR_00P])[kt ]=(two*feqW27_T -(f27_B *(q*omegaD-one)-omegaD*feq27_B *(q-one))/(omegaD-one)+f27_T *q)/(q+one); + //q = q_dirNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one); + //q = q_dirSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one); + //q = q_dirSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one); + //q = q_dirNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one); + //q = q_dirTE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one); + //q = q_dirBW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one); + //q = q_dirBE[k]; if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one); + //q = q_dirTW[k]; if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one); + //q = q_dirTN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one); + //q = q_dirBS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one); + //q = q_dirBN[k]; if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one); + //q = q_dirTS[k]; if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one); //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one); //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one); //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one); @@ -6739,131 +6739,131 @@ __global__ void AD_SlipVelDeviceComp( uint* neighborX, uint* neighborY, uint* neighborZ, - uint size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep) { - D.f[DIR_P00 ] = &distributions[DIR_P00 * size_Mat]; - D.f[DIR_M00 ] = &distributions[DIR_M00 * size_Mat]; - D.f[DIR_0P0 ] = &distributions[DIR_0P0 * size_Mat]; - D.f[DIR_0M0 ] = &distributions[DIR_0M0 * size_Mat]; - D.f[DIR_00P ] = &distributions[DIR_00P * size_Mat]; - D.f[DIR_00M ] = &distributions[DIR_00M * size_Mat]; - D.f[DIR_PP0 ] = &distributions[DIR_PP0 * size_Mat]; - D.f[DIR_MM0 ] = &distributions[DIR_MM0 * size_Mat]; - D.f[DIR_PM0 ] = &distributions[DIR_PM0 * size_Mat]; - D.f[DIR_MP0 ] = &distributions[DIR_MP0 * size_Mat]; - D.f[DIR_P0P ] = &distributions[DIR_P0P * size_Mat]; - D.f[DIR_M0M ] = &distributions[DIR_M0M * size_Mat]; - D.f[DIR_P0M ] = &distributions[DIR_P0M * size_Mat]; - D.f[DIR_M0P ] = &distributions[DIR_M0P * size_Mat]; - D.f[DIR_0PP ] = &distributions[DIR_0PP * size_Mat]; - D.f[DIR_0MM ] = &distributions[DIR_0MM * size_Mat]; - D.f[DIR_0PM ] = &distributions[DIR_0PM * size_Mat]; - D.f[DIR_0MP ] = &distributions[DIR_0MP * size_Mat]; - D.f[DIR_000] = &distributions[DIR_000 * size_Mat]; - D.f[DIR_PPP ] = &distributions[DIR_PPP * size_Mat]; - D.f[DIR_MMP ] = &distributions[DIR_MMP * size_Mat]; - D.f[DIR_PMP ] = &distributions[DIR_PMP * size_Mat]; - D.f[DIR_MPP ] = &distributions[DIR_MPP * size_Mat]; - D.f[DIR_PPM ] = &distributions[DIR_PPM * size_Mat]; - D.f[DIR_MMM ] = &distributions[DIR_MMM * size_Mat]; - D.f[DIR_PMM ] = &distributions[DIR_PMM * size_Mat]; - D.f[DIR_MPM ] = &distributions[DIR_MPM * size_Mat]; + D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &distributions[DIR_P00 * size_Mat]; - D.f[DIR_P00 ] = &distributions[DIR_M00 * size_Mat]; - D.f[DIR_0M0 ] = &distributions[DIR_0P0 * size_Mat]; - D.f[DIR_0P0 ] = &distributions[DIR_0M0 * size_Mat]; - D.f[DIR_00M ] = &distributions[DIR_00P * size_Mat]; - D.f[DIR_00P ] = &distributions[DIR_00M * size_Mat]; - D.f[DIR_MM0 ] = &distributions[DIR_PP0 * size_Mat]; - D.f[DIR_PP0 ] = &distributions[DIR_MM0 * size_Mat]; - D.f[DIR_MP0 ] = &distributions[DIR_PM0 * size_Mat]; - D.f[DIR_PM0 ] = &distributions[DIR_MP0 * size_Mat]; - D.f[DIR_M0M ] = &distributions[DIR_P0P * size_Mat]; - D.f[DIR_P0P ] = &distributions[DIR_M0M * size_Mat]; - D.f[DIR_M0P ] = &distributions[DIR_P0M * size_Mat]; - D.f[DIR_P0M ] = &distributions[DIR_M0P * size_Mat]; - D.f[DIR_0MM ] = &distributions[DIR_0PP * size_Mat]; - D.f[DIR_0PP ] = &distributions[DIR_0MM * size_Mat]; - D.f[DIR_0MP ] = &distributions[DIR_0PM * size_Mat]; - D.f[DIR_0PM ] = &distributions[DIR_0MP * size_Mat]; - D.f[DIR_000] = &distributions[DIR_000 * size_Mat]; - D.f[DIR_PPP ] = &distributions[DIR_MMM * size_Mat]; - D.f[DIR_MMP ] = &distributions[DIR_PPM * size_Mat]; - D.f[DIR_PMP ] = &distributions[DIR_MPM * size_Mat]; - D.f[DIR_MPP ] = &distributions[DIR_PMM * size_Mat]; - D.f[DIR_PPM ] = &distributions[DIR_MMP * size_Mat]; - D.f[DIR_MMM ] = &distributions[DIR_PPP * size_Mat]; - D.f[DIR_PMM ] = &distributions[DIR_MPP * size_Mat]; - D.f[DIR_MPM ] = &distributions[DIR_PMP * size_Mat]; + D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// Distributions27 DAD; if (isEvenTimestep) { - DAD.f[DIR_P00 ] = &distributionsAD[DIR_P00 * size_Mat]; - DAD.f[DIR_M00 ] = &distributionsAD[DIR_M00 * size_Mat]; - DAD.f[DIR_0P0 ] = &distributionsAD[DIR_0P0 * size_Mat]; - DAD.f[DIR_0M0 ] = &distributionsAD[DIR_0M0 * size_Mat]; - DAD.f[DIR_00P ] = &distributionsAD[DIR_00P * size_Mat]; - DAD.f[DIR_00M ] = &distributionsAD[DIR_00M * size_Mat]; - DAD.f[DIR_PP0 ] = &distributionsAD[DIR_PP0 * size_Mat]; - DAD.f[DIR_MM0 ] = &distributionsAD[DIR_MM0 * size_Mat]; - DAD.f[DIR_PM0 ] = &distributionsAD[DIR_PM0 * size_Mat]; - DAD.f[DIR_MP0 ] = &distributionsAD[DIR_MP0 * size_Mat]; - DAD.f[DIR_P0P ] = &distributionsAD[DIR_P0P * size_Mat]; - DAD.f[DIR_M0M ] = &distributionsAD[DIR_M0M * size_Mat]; - DAD.f[DIR_P0M ] = &distributionsAD[DIR_P0M * size_Mat]; - DAD.f[DIR_M0P ] = &distributionsAD[DIR_M0P * size_Mat]; - DAD.f[DIR_0PP ] = &distributionsAD[DIR_0PP * size_Mat]; - DAD.f[DIR_0MM ] = &distributionsAD[DIR_0MM * size_Mat]; - DAD.f[DIR_0PM ] = &distributionsAD[DIR_0PM * size_Mat]; - DAD.f[DIR_0MP ] = &distributionsAD[DIR_0MP * size_Mat]; - DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat]; - DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP * size_Mat]; - DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP * size_Mat]; - DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP * size_Mat]; - DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP * size_Mat]; - DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM * size_Mat]; - DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM * size_Mat]; - DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM * size_Mat]; - DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM * size_Mat]; + DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes]; + DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes]; + DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } else { - DAD.f[DIR_M00 ] = &distributionsAD[DIR_P00 * size_Mat]; - DAD.f[DIR_P00 ] = &distributionsAD[DIR_M00 * size_Mat]; - DAD.f[DIR_0M0 ] = &distributionsAD[DIR_0P0 * size_Mat]; - DAD.f[DIR_0P0 ] = &distributionsAD[DIR_0M0 * size_Mat]; - DAD.f[DIR_00M ] = &distributionsAD[DIR_00P * size_Mat]; - DAD.f[DIR_00P ] = &distributionsAD[DIR_00M * size_Mat]; - DAD.f[DIR_MM0 ] = &distributionsAD[DIR_PP0 * size_Mat]; - DAD.f[DIR_PP0 ] = &distributionsAD[DIR_MM0 * size_Mat]; - DAD.f[DIR_MP0 ] = &distributionsAD[DIR_PM0 * size_Mat]; - DAD.f[DIR_PM0 ] = &distributionsAD[DIR_MP0 * size_Mat]; - DAD.f[DIR_M0M ] = &distributionsAD[DIR_P0P * size_Mat]; - DAD.f[DIR_P0P ] = &distributionsAD[DIR_M0M * size_Mat]; - DAD.f[DIR_M0P ] = &distributionsAD[DIR_P0M * size_Mat]; - DAD.f[DIR_P0M ] = &distributionsAD[DIR_M0P * size_Mat]; - DAD.f[DIR_0MM ] = &distributionsAD[DIR_0PP * size_Mat]; - DAD.f[DIR_0PP ] = &distributionsAD[DIR_0MM * size_Mat]; - DAD.f[DIR_0MP ] = &distributionsAD[DIR_0PM * size_Mat]; - DAD.f[DIR_0PM ] = &distributionsAD[DIR_0MP * size_Mat]; - DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat]; - DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM * size_Mat]; - DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM * size_Mat]; - DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM * size_Mat]; - DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM * size_Mat]; - DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP * size_Mat]; - DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP * size_Mat]; - DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP * size_Mat]; - DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP * size_Mat]; + DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes]; + DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes]; + DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes]; + DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -6888,24 +6888,24 @@ __global__ void AD_SlipVelDeviceComp( * q_dirBE, * q_dirTW, * q_dirTN, * q_dirBS, * q_dirBN, * q_dirTS, * q_dirTNE, * q_dirTSW, * q_dirTSE, * q_dirTNW, * q_dirBNE, * q_dirBSW, * q_dirBSE, * q_dirBNW; - q_dirE = &Qarrays[DIR_P00 * numberOfBCnodes]; - q_dirW = &Qarrays[DIR_M00 * numberOfBCnodes]; - q_dirN = &Qarrays[DIR_0P0 * numberOfBCnodes]; - q_dirS = &Qarrays[DIR_0M0 * numberOfBCnodes]; - q_dirT = &Qarrays[DIR_00P * numberOfBCnodes]; - q_dirB = &Qarrays[DIR_00M * numberOfBCnodes]; - q_dirNE = &Qarrays[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &Qarrays[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &Qarrays[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &Qarrays[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &Qarrays[DIR_P0P * numberOfBCnodes]; - q_dirBW = &Qarrays[DIR_M0M * numberOfBCnodes]; - q_dirBE = &Qarrays[DIR_P0M * numberOfBCnodes]; - q_dirTW = &Qarrays[DIR_M0P * numberOfBCnodes]; - q_dirTN = &Qarrays[DIR_0PP * numberOfBCnodes]; - q_dirBS = &Qarrays[DIR_0MM * numberOfBCnodes]; - q_dirBN = &Qarrays[DIR_0PM * numberOfBCnodes]; - q_dirTS = &Qarrays[DIR_0MP * numberOfBCnodes]; + q_dirE = &Qarrays[DIR_P00 * numberOfBCnodes]; + q_dirW = &Qarrays[DIR_M00 * numberOfBCnodes]; + q_dirN = &Qarrays[DIR_0P0 * numberOfBCnodes]; + q_dirS = &Qarrays[DIR_0M0 * numberOfBCnodes]; + q_dirT = &Qarrays[DIR_00P * numberOfBCnodes]; + q_dirB = &Qarrays[DIR_00M * numberOfBCnodes]; + q_dirNE = &Qarrays[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &Qarrays[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &Qarrays[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &Qarrays[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &Qarrays[DIR_P0P * numberOfBCnodes]; + q_dirBW = &Qarrays[DIR_M0M * numberOfBCnodes]; + q_dirBE = &Qarrays[DIR_P0M * numberOfBCnodes]; + q_dirTW = &Qarrays[DIR_M0P * numberOfBCnodes]; + q_dirTN = &Qarrays[DIR_0PP * numberOfBCnodes]; + q_dirBS = &Qarrays[DIR_0MM * numberOfBCnodes]; + q_dirBN = &Qarrays[DIR_0PM * numberOfBCnodes]; + q_dirTS = &Qarrays[DIR_0MP * numberOfBCnodes]; q_dirTNE = &Qarrays[DIR_PPP * numberOfBCnodes]; q_dirTSW = &Qarrays[DIR_MMP * numberOfBCnodes]; q_dirTSE = &Qarrays[DIR_PMP * numberOfBCnodes]; @@ -7025,63 +7025,63 @@ __global__ void AD_SlipVelDeviceComp( ////////////////////////////////////////////////////////////////////////// if (!isEvenTimestep) { - DAD.f[DIR_P00 ] = &distributionsAD[DIR_P00 * size_Mat]; - DAD.f[DIR_M00 ] = &distributionsAD[DIR_M00 * size_Mat]; - DAD.f[DIR_0P0 ] = &distributionsAD[DIR_0P0 * size_Mat]; - DAD.f[DIR_0M0 ] = &distributionsAD[DIR_0M0 * size_Mat]; - DAD.f[DIR_00P ] = &distributionsAD[DIR_00P * size_Mat]; - DAD.f[DIR_00M ] = &distributionsAD[DIR_00M * size_Mat]; - DAD.f[DIR_PP0 ] = &distributionsAD[DIR_PP0 * size_Mat]; - DAD.f[DIR_MM0 ] = &distributionsAD[DIR_MM0 * size_Mat]; - DAD.f[DIR_PM0 ] = &distributionsAD[DIR_PM0 * size_Mat]; - DAD.f[DIR_MP0 ] = &distributionsAD[DIR_MP0 * size_Mat]; - DAD.f[DIR_P0P ] = &distributionsAD[DIR_P0P * size_Mat]; - DAD.f[DIR_M0M ] = &distributionsAD[DIR_M0M * size_Mat]; - DAD.f[DIR_P0M ] = &distributionsAD[DIR_P0M * size_Mat]; - DAD.f[DIR_M0P ] = &distributionsAD[DIR_M0P * size_Mat]; - DAD.f[DIR_0PP ] = &distributionsAD[DIR_0PP * size_Mat]; - DAD.f[DIR_0MM ] = &distributionsAD[DIR_0MM * size_Mat]; - DAD.f[DIR_0PM ] = &distributionsAD[DIR_0PM * size_Mat]; - DAD.f[DIR_0MP ] = &distributionsAD[DIR_0MP * size_Mat]; - DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat]; - DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP * size_Mat]; - DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP * size_Mat]; - DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP * size_Mat]; - DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP * size_Mat]; - DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM * size_Mat]; - DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM * size_Mat]; - DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM * size_Mat]; - DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM * size_Mat]; + DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes]; + DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes]; + DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } else { - DAD.f[DIR_M00 ] = &distributionsAD[DIR_P00 * size_Mat]; - DAD.f[DIR_P00 ] = &distributionsAD[DIR_M00 * size_Mat]; - DAD.f[DIR_0M0 ] = &distributionsAD[DIR_0P0 * size_Mat]; - DAD.f[DIR_0P0 ] = &distributionsAD[DIR_0M0 * size_Mat]; - DAD.f[DIR_00M ] = &distributionsAD[DIR_00P * size_Mat]; - DAD.f[DIR_00P ] = &distributionsAD[DIR_00M * size_Mat]; - DAD.f[DIR_MM0 ] = &distributionsAD[DIR_PP0 * size_Mat]; - DAD.f[DIR_PP0 ] = &distributionsAD[DIR_MM0 * size_Mat]; - DAD.f[DIR_MP0 ] = &distributionsAD[DIR_PM0 * size_Mat]; - DAD.f[DIR_PM0 ] = &distributionsAD[DIR_MP0 * size_Mat]; - DAD.f[DIR_M0M ] = &distributionsAD[DIR_P0P * size_Mat]; - DAD.f[DIR_P0P ] = &distributionsAD[DIR_M0M * size_Mat]; - DAD.f[DIR_M0P ] = &distributionsAD[DIR_P0M * size_Mat]; - DAD.f[DIR_P0M ] = &distributionsAD[DIR_M0P * size_Mat]; - DAD.f[DIR_0MM ] = &distributionsAD[DIR_0PP * size_Mat]; - DAD.f[DIR_0PP ] = &distributionsAD[DIR_0MM * size_Mat]; - DAD.f[DIR_0MP ] = &distributionsAD[DIR_0PM * size_Mat]; - DAD.f[DIR_0PM ] = &distributionsAD[DIR_0MP * size_Mat]; - DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat]; - DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM * size_Mat]; - DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM * size_Mat]; - DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM * size_Mat]; - DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM * size_Mat]; - DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP * size_Mat]; - DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP * size_Mat]; - DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP * size_Mat]; - DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP * size_Mat]; + DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes]; + DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes]; + DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes]; + DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real concentration = @@ -7115,24 +7115,24 @@ __global__ void AD_SlipVelDeviceComp( real jTan3 = jx3 - NormJ * NormZ; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - q = q_dirE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00 ])[kw ] = calcDistributionBC_AD(q, c2o27, vx1, cu_sq, f_E, f_W, omegaDiffusivity, jTan1, concentration); } - q = q_dirW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00 ])[ke ] = calcDistributionBC_AD(q, c2o27, -vx1, cu_sq, f_W, f_E, omegaDiffusivity, -jTan1, concentration); } - q = q_dirN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0 ])[ks ] = calcDistributionBC_AD(q, c2o27, vx2, cu_sq, f_N, f_S, omegaDiffusivity, jTan2, concentration); } - q = q_dirS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0 ])[kn ] = calcDistributionBC_AD(q, c2o27, -vx2, cu_sq, f_S, f_N, omegaDiffusivity, -jTan2, concentration); } - q = q_dirT[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M ])[kb ] = calcDistributionBC_AD(q, c2o27, vx3, cu_sq, f_T, f_B, omegaDiffusivity, jTan3, concentration); } - q = q_dirB[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P ])[kt ] = calcDistributionBC_AD(q, c2o27, -vx3, cu_sq, f_B, f_T, omegaDiffusivity, -jTan3, concentration); } - q = q_dirNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0 ])[ksw ] = calcDistributionBC_AD(q, c1o54, vx1+vx2, cu_sq, f_NE, f_SW, omegaDiffusivity, jTan1+jTan2, concentration); } - q = q_dirSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0 ])[kne ] = calcDistributionBC_AD(q, c1o54, -vx1-vx2, cu_sq, f_SW, f_NE, omegaDiffusivity, -jTan1-jTan2, concentration); } - q = q_dirSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0 ])[knw ] = calcDistributionBC_AD(q, c1o54, vx1-vx2, cu_sq, f_SE, f_NW, omegaDiffusivity, jTan1-jTan2, concentration); } - q = q_dirNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0 ])[kse ] = calcDistributionBC_AD(q, c1o54, -vx1+vx2, cu_sq, f_NW, f_SE, omegaDiffusivity, -jTan1+jTan2, concentration); } - q = q_dirTE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M ])[kbw ] = calcDistributionBC_AD(q, c1o54, vx1 +vx3, cu_sq, f_TE, f_BW, omegaDiffusivity, jTan1 +jTan3, concentration); } - q = q_dirBW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P ])[kte ] = calcDistributionBC_AD(q, c1o54, -vx1 -vx3, cu_sq, f_BW, f_TE, omegaDiffusivity, -jTan1 -jTan3, concentration); } - q = q_dirBE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P ])[ktw ] = calcDistributionBC_AD(q, c1o54, vx1 -vx3, cu_sq, f_BE, f_TW, omegaDiffusivity, jTan1 -jTan3, concentration); } - q = q_dirTW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M ])[kbe ] = calcDistributionBC_AD(q, c1o54, -vx1 +vx3, cu_sq, f_TW, f_BE, omegaDiffusivity, -jTan1 +jTan3, concentration); } - q = q_dirTN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM ])[kbs ] = calcDistributionBC_AD(q, c1o54, vx2+vx3, cu_sq, f_TN, f_BS, omegaDiffusivity, jTan2+jTan3, concentration); } - q = q_dirBS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP ])[ktn ] = calcDistributionBC_AD(q, c1o54, -vx2-vx3, cu_sq, f_BS, f_TN, omegaDiffusivity, -jTan2-jTan3, concentration); } - q = q_dirBN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP ])[kts ] = calcDistributionBC_AD(q, c1o54, vx2-vx3, cu_sq, f_BN, f_TS, omegaDiffusivity, jTan2-jTan3, concentration); } - q = q_dirTS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM ])[kbn ] = calcDistributionBC_AD(q, c1o54, -vx2+vx3, cu_sq, f_TS, f_BN, omegaDiffusivity, -jTan2+jTan3, concentration); } + q = q_dirE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00])[kw ] = calcDistributionBC_AD(q, c2o27, vx1, cu_sq, f_E, f_W, omegaDiffusivity, jTan1, concentration); } + q = q_dirW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00])[ke ] = calcDistributionBC_AD(q, c2o27, -vx1, cu_sq, f_W, f_E, omegaDiffusivity, -jTan1, concentration); } + q = q_dirN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0])[ks ] = calcDistributionBC_AD(q, c2o27, vx2, cu_sq, f_N, f_S, omegaDiffusivity, jTan2, concentration); } + q = q_dirS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0])[kn ] = calcDistributionBC_AD(q, c2o27, -vx2, cu_sq, f_S, f_N, omegaDiffusivity, -jTan2, concentration); } + q = q_dirT[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M])[kb ] = calcDistributionBC_AD(q, c2o27, vx3, cu_sq, f_T, f_B, omegaDiffusivity, jTan3, concentration); } + q = q_dirB[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P])[kt ] = calcDistributionBC_AD(q, c2o27, -vx3, cu_sq, f_B, f_T, omegaDiffusivity, -jTan3, concentration); } + q = q_dirNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0])[ksw ] = calcDistributionBC_AD(q, c1o54, vx1+vx2, cu_sq, f_NE, f_SW, omegaDiffusivity, jTan1+jTan2, concentration); } + q = q_dirSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0])[kne ] = calcDistributionBC_AD(q, c1o54, -vx1-vx2, cu_sq, f_SW, f_NE, omegaDiffusivity, -jTan1-jTan2, concentration); } + q = q_dirSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0])[knw ] = calcDistributionBC_AD(q, c1o54, vx1-vx2, cu_sq, f_SE, f_NW, omegaDiffusivity, jTan1-jTan2, concentration); } + q = q_dirNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0])[kse ] = calcDistributionBC_AD(q, c1o54, -vx1+vx2, cu_sq, f_NW, f_SE, omegaDiffusivity, -jTan1+jTan2, concentration); } + q = q_dirTE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M])[kbw ] = calcDistributionBC_AD(q, c1o54, vx1 +vx3, cu_sq, f_TE, f_BW, omegaDiffusivity, jTan1 +jTan3, concentration); } + q = q_dirBW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P])[kte ] = calcDistributionBC_AD(q, c1o54, -vx1 -vx3, cu_sq, f_BW, f_TE, omegaDiffusivity, -jTan1 -jTan3, concentration); } + q = q_dirBE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P])[ktw ] = calcDistributionBC_AD(q, c1o54, vx1 -vx3, cu_sq, f_BE, f_TW, omegaDiffusivity, jTan1 -jTan3, concentration); } + q = q_dirTW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M])[kbe ] = calcDistributionBC_AD(q, c1o54, -vx1 +vx3, cu_sq, f_TW, f_BE, omegaDiffusivity, -jTan1 +jTan3, concentration); } + q = q_dirTN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM])[kbs ] = calcDistributionBC_AD(q, c1o54, vx2+vx3, cu_sq, f_TN, f_BS, omegaDiffusivity, jTan2+jTan3, concentration); } + q = q_dirBS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP])[ktn ] = calcDistributionBC_AD(q, c1o54, -vx2-vx3, cu_sq, f_BS, f_TN, omegaDiffusivity, -jTan2-jTan3, concentration); } + q = q_dirBN[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP])[kts ] = calcDistributionBC_AD(q, c1o54, vx2-vx3, cu_sq, f_BN, f_TS, omegaDiffusivity, jTan2-jTan3, concentration); } + q = q_dirTS[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM])[kbn ] = calcDistributionBC_AD(q, c1o54, -vx2+vx3, cu_sq, f_TS, f_BN, omegaDiffusivity, -jTan2+jTan3, concentration); } q = q_dirTNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMM])[kbsw] = calcDistributionBC_AD(q, c1o216, vx1+vx2+vx3, cu_sq, f_TNE, f_BSW, omegaDiffusivity, jTan1+jTan2+jTan3, concentration); } q = q_dirBSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PPP])[ktne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2-vx3, cu_sq, f_BSW, f_TNE, omegaDiffusivity, -jTan1-jTan2-jTan3, concentration); } q = q_dirBNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMP])[ktsw] = calcDistributionBC_AD(q, c1o216, vx1+vx2-vx3, cu_sq, f_BNE, f_TSW, omegaDiffusivity, jTan1+jTan2-jTan3, concentration); } diff --git a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu index 1ef111330c0d4293c14d66893847689ad8fac77f..8d02f4e1c110fc82b65adda4db67976f29796d07 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu @@ -14,69 +14,69 @@ __global__ void CalcCP27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -124,20 +124,20 @@ __global__ void CalcCP27(real* DD, //////////////////////////////////////////////////////////////////////////////// double PressCP; - PressCP = (D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ + PressCP = (D.f[DIR_P00])[ke ]+ (D.f[DIR_M00])[kw ]+ + (D.f[DIR_0P0])[kn ]+ (D.f[DIR_0M0])[ks ]+ + (D.f[DIR_00P])[kt ]+ (D.f[DIR_00M])[kb ]+ + (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+ + (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+ + (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+ + (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+ + (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+ + (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+ (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]; + (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ + (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ + (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ + (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw]; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cpPress[k] = PressCP; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu index ce8fe68cd6a2e8f09f150cb0ccdec502a6278b50..c41751dc1b5cea53983d94d9cc7c3c75c8a84101 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu @@ -16,70 +16,70 @@ __global__ void LBCalc2ndMomentsIncompSP27( real* kxyFromfcNEQ, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -92,7 +92,7 @@ __global__ void LBCalc2ndMomentsIncompSP27( real* kxyFromfcNEQ, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k < size_Mat) + if(k < numberOfLBnodes) { ////////////////////////////////////////////////////////////////////////// //index @@ -125,33 +125,33 @@ __global__ void LBCalc2ndMomentsIncompSP27( real* kxyFromfcNEQ, unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////// real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,/*f_ZERO,*/f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; //f_ZERO = (D.f[DIR_000])[kzero]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3; kxyFromfcNEQ[k] = c0o1; @@ -215,70 +215,70 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -291,7 +291,7 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k < size_Mat) + if(k < numberOfLBnodes) { ////////////////////////////////////////////////////////////////////////// //index @@ -325,33 +325,33 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ, ////////////////////////////////////////////////////////////////////////// real f_ZERO; real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; f_ZERO = (D.f[DIR_000])[kzero]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// real drho; real vx1, vx2, vx3, rho; @@ -423,7 +423,7 @@ __global__ void LBCalc3rdMomentsIncompSP27( real* CUMbbb, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -437,7 +437,7 @@ __global__ void LBCalc3rdMomentsIncompSP27( real* CUMbbb, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -448,63 +448,63 @@ __global__ void LBCalc3rdMomentsIncompSP27( real* CUMbbb, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -517,33 +517,33 @@ __global__ void LBCalc3rdMomentsIncompSP27( real* CUMbbb, unsigned int kbs = neighborZ[ks]; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real vvx =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) + @@ -857,7 +857,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -871,7 +871,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -882,63 +882,63 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -951,33 +951,33 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb, unsigned int kbs = neighborZ[ks]; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + @@ -1298,7 +1298,7 @@ __global__ void LBCalcHigherMomentsIncompSP27( real* CUMcbb, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -1312,7 +1312,7 @@ __global__ void LBCalcHigherMomentsIncompSP27( real* CUMcbb, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -1323,63 +1323,63 @@ __global__ void LBCalcHigherMomentsIncompSP27( real* CUMcbb, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1392,33 +1392,33 @@ __global__ void LBCalcHigherMomentsIncompSP27( real* CUMcbb, unsigned int kbs = neighborZ[ks]; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real vvx =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) + @@ -1752,7 +1752,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -1766,7 +1766,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -1777,63 +1777,63 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1846,33 +1846,33 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb, unsigned int kbs = neighborZ[ks]; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu index d246f39a030b6df0b249aee17f37b7d5258ff00d..ad5a05b12a1b3ae2541e36ccffae4635fccfe62a 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu @@ -45,7 +45,7 @@ __global__ void CalcConc27( uint* neighborX, uint* neighborY, uint* neighborZ, - uint size_Mat, + unsigned long long numberOfLBnodes, real* distributionsAD, bool isEvenTimestep) { @@ -67,7 +67,7 @@ __global__ void CalcConc27( ////////////////////////////////////////////////////////////////////////// // run for all indices in size_Mat and fluid nodes - if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) + if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID)) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -76,63 +76,63 @@ __global__ void CalcConc27( Distributions27 distAD; if (isEvenTimestep) { - distAD.f[DIR_P00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_M00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat]; + distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } else { - distAD.f[DIR_M00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_P00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat]; + distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) @@ -166,33 +166,33 @@ __global__ void CalcConc27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real mfcbb = (distAD.f[DIR_P00 ])[ke ]; - real mfabb = (distAD.f[DIR_M00 ])[kw ]; - real mfbcb = (distAD.f[DIR_0P0 ])[kn ]; - real mfbab = (distAD.f[DIR_0M0 ])[ks ]; - real mfbbc = (distAD.f[DIR_00P ])[kt ]; - real mfbba = (distAD.f[DIR_00M ])[kb ]; - real mfccb = (distAD.f[DIR_PP0 ])[kne ]; - real mfaab = (distAD.f[DIR_MM0 ])[ksw ]; - real mfcab = (distAD.f[DIR_PM0 ])[kse ]; - real mfacb = (distAD.f[DIR_MP0 ])[knw ]; - real mfcbc = (distAD.f[DIR_P0P ])[kte ]; - real mfaba = (distAD.f[DIR_M0M ])[kbw ]; - real mfcba = (distAD.f[DIR_P0M ])[kbe ]; - real mfabc = (distAD.f[DIR_M0P ])[ktw ]; - real mfbcc = (distAD.f[DIR_0PP ])[ktn ]; - real mfbaa = (distAD.f[DIR_0MM ])[kbs ]; - real mfbca = (distAD.f[DIR_0PM ])[kbn ]; - real mfbac = (distAD.f[DIR_0MP ])[kts ]; + real mfcbb = (distAD.f[DIR_P00])[ke ]; + real mfabb = (distAD.f[DIR_M00])[kw ]; + real mfbcb = (distAD.f[DIR_0P0])[kn ]; + real mfbab = (distAD.f[DIR_0M0])[ks ]; + real mfbbc = (distAD.f[DIR_00P])[kt ]; + real mfbba = (distAD.f[DIR_00M])[kb ]; + real mfccb = (distAD.f[DIR_PP0])[kne ]; + real mfaab = (distAD.f[DIR_MM0])[ksw ]; + real mfcab = (distAD.f[DIR_PM0])[kse ]; + real mfacb = (distAD.f[DIR_MP0])[knw ]; + real mfcbc = (distAD.f[DIR_P0P])[kte ]; + real mfaba = (distAD.f[DIR_M0M])[kbw ]; + real mfcba = (distAD.f[DIR_P0M])[kbe ]; + real mfabc = (distAD.f[DIR_M0P])[ktw ]; + real mfbcc = (distAD.f[DIR_0PP])[ktn ]; + real mfbaa = (distAD.f[DIR_0MM])[kbs ]; + real mfbca = (distAD.f[DIR_0PM])[kbn ]; + real mfbac = (distAD.f[DIR_0MP])[kts ]; real mfbbb = (distAD.f[DIR_000])[k ]; - real mfccc = (distAD.f[DIR_PPP ])[ktne]; - real mfaac = (distAD.f[DIR_MMP ])[ktsw]; - real mfcac = (distAD.f[DIR_PMP ])[ktse]; - real mfacc = (distAD.f[DIR_MPP ])[ktnw]; - real mfcca = (distAD.f[DIR_PPM ])[kbne]; - real mfaaa = (distAD.f[DIR_MMM ])[kbsw]; - real mfcaa = (distAD.f[DIR_PMM ])[kbse]; - real mfaca = (distAD.f[DIR_MPM ])[kbnw]; + real mfccc = (distAD.f[DIR_PPP])[ktne]; + real mfaac = (distAD.f[DIR_MMP])[ktsw]; + real mfcac = (distAD.f[DIR_PMP])[ktse]; + real mfacc = (distAD.f[DIR_MPP])[ktnw]; + real mfcca = (distAD.f[DIR_PPM])[kbne]; + real mfaaa = (distAD.f[DIR_MMM])[kbsw]; + real mfcaa = (distAD.f[DIR_PMM])[kbse]; + real mfaca = (distAD.f[DIR_MPM])[kbnw]; ////////////////////////////////////////////////////////////////////////// //! - Calculate concentration using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> @@ -229,30 +229,30 @@ __global__ void CalcConc7( real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool isEvenTimestep) { Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -265,7 +265,7 @@ __global__ void CalcConc7( real* Conc, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { ////////////////////////////////////////////////////////////////////////// //index @@ -327,63 +327,63 @@ __global__ void CalcConc7( real* Conc, // Distributions27 D27; // if (isEvenTimestep==true) // { -// D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; -// D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; -// D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; -// D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; -// D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; -// D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; -// D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; -// D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; -// D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; -// D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; -// D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; -// D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; -// D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; -// D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; -// D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; -// D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; -// D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; -// D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; -// D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; -// D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; -// D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; -// D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; -// D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; -// D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; -// D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; -// D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; -// D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; +// D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat]; +// D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat]; +// D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat]; +// D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat]; +// D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat]; +// D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat]; +// D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat]; +// D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat]; +// D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat]; +// D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat]; +// D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat]; +// D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat]; +// D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat]; +// D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat]; +// D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat]; +// D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat]; +// D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat]; +// D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat]; +// D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; +// D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat]; +// D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat]; +// D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat]; +// D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat]; +// D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat]; +// D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat]; +// D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat]; +// D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat]; // } // else // { -// D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; -// D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; -// D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; -// D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; -// D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; -// D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; -// D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; -// D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; -// D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; -// D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; -// D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; -// D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; -// D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; -// D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; -// D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; -// D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; -// D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; -// D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; -// D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; -// D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; -// D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; -// D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; -// D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; -// D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; -// D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; -// D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; -// D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; +// D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat]; +// D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat]; +// D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat]; +// D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat]; +// D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat]; +// D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat]; +// D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat]; +// D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat]; +// D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat]; +// D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat]; +// D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat]; +// D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat]; +// D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat]; +// D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat]; +// D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat]; +// D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat]; +// D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat]; +// D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat]; +// D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; +// D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat]; +// D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat]; +// D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat]; +// D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat]; +// D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat]; +// D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat]; +// D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat]; +// D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat]; // } // //////////////////////////////////////////////////////////////////////////////// // const unsigned x = threadIdx.x; // Globaler x-Index @@ -432,20 +432,20 @@ __global__ void CalcConc7( real* Conc, // if(geoD[k] == GEO_FLUID) // { -// Conc[k] = (D27.f[DIR_P00 ])[ke ]+ (D27.f[DIR_M00 ])[kw ]+ -// (D27.f[DIR_0P0 ])[kn ]+ (D27.f[DIR_0M0 ])[ks ]+ -// (D27.f[DIR_00P ])[kt ]+ (D27.f[DIR_00M ])[kb ]+ -// (D27.f[DIR_PP0 ])[kne ]+ (D27.f[DIR_MM0 ])[ksw ]+ -// (D27.f[DIR_PM0 ])[kse ]+ (D27.f[DIR_MP0 ])[knw ]+ -// (D27.f[DIR_P0P ])[kte ]+ (D27.f[DIR_M0M ])[kbw ]+ -// (D27.f[DIR_P0M ])[kbe ]+ (D27.f[DIR_M0P ])[ktw ]+ -// (D27.f[DIR_0PP ])[ktn ]+ (D27.f[DIR_0MM ])[kbs ]+ -// (D27.f[DIR_0PM ])[kbn ]+ (D27.f[DIR_0MP ])[kts ]+ +// Conc[k] = (D27.f[DIR_P00])[ke ]+ (D27.f[DIR_M00])[kw ]+ +// (D27.f[DIR_0P0])[kn ]+ (D27.f[DIR_0M0])[ks ]+ +// (D27.f[DIR_00P])[kt ]+ (D27.f[DIR_00M])[kb ]+ +// (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+ +// (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+ +// (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+ +// (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+ +// (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+ +// (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+ // (D27.f[DIR_000])[kzero]+ -// (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+ -// (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+ -// (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+ -// (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw]; +// (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+ +// (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+ +// (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+ +// (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw]; // } // } // } @@ -476,30 +476,30 @@ __global__ void GetPlaneConc7(real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool isEvenTimestep) { Distributions7 D7; if (isEvenTimestep==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -581,70 +581,70 @@ __global__ void GetPlaneConc27(real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD27, bool isEvenTimestep) { Distributions27 D27; if (isEvenTimestep==true) { - D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -693,20 +693,20 @@ __global__ void GetPlaneConc27(real* Conc, if(geoD[k] == GEO_FLUID) { - Conc[k] = (D27.f[DIR_P00 ])[ke ]+ (D27.f[DIR_M00 ])[kw ]+ - (D27.f[DIR_0P0 ])[kn ]+ (D27.f[DIR_0M0 ])[ks ]+ - (D27.f[DIR_00P ])[kt ]+ (D27.f[DIR_00M ])[kb ]+ - (D27.f[DIR_PP0 ])[kne ]+ (D27.f[DIR_MM0 ])[ksw ]+ - (D27.f[DIR_PM0 ])[kse ]+ (D27.f[DIR_MP0 ])[knw ]+ - (D27.f[DIR_P0P ])[kte ]+ (D27.f[DIR_M0M ])[kbw ]+ - (D27.f[DIR_P0M ])[kbe ]+ (D27.f[DIR_M0P ])[ktw ]+ - (D27.f[DIR_0PP ])[ktn ]+ (D27.f[DIR_0MM ])[kbs ]+ - (D27.f[DIR_0PM ])[kbn ]+ (D27.f[DIR_0MP ])[kts ]+ + Conc[k] = (D27.f[DIR_P00])[ke ]+ (D27.f[DIR_M00])[kw ]+ + (D27.f[DIR_0P0])[kn ]+ (D27.f[DIR_0M0])[ks ]+ + (D27.f[DIR_00P])[kt ]+ (D27.f[DIR_00M])[kb ]+ + (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+ + (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+ + (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+ + (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+ + (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+ + (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+ (D27.f[DIR_000])[kzero]+ - (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+ - (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+ - (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+ - (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw]; + (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+ + (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+ + (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+ + (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw]; } } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu index 4792b8846b2612383c07a97419e0473b21ebd187..f7bb09f816f45973fd4e2319a1bfa35cf9172caa 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu @@ -1,306 +1,310 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CalcMac27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Soeren Peters +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" +#include "lbm/MacroscopicQuantities.h" + +#include "Kernel/Utilities/DistributionHelper.cuh" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; + +//////////////////////////////////////////////////////////////////////////////// +__global__ void LBCalcMac27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) +{ + const unsigned int tx = threadIdx.x; // Thread index = lokaler i index + const unsigned int by = blockIdx.x; // Block index x + const unsigned int bz = blockIdx.y; // Block index y + const unsigned int x = tx + STARTOFFX; // Globaler x-Index + const unsigned int y = by + STARTOFFY; // Globaler y-Index + const unsigned int z = bz + STARTOFFZ; // Globaler z-Index + + const unsigned nx = blockDim.x + 2 * STARTOFFX; + const unsigned ny = gridDim.x + 2 * STARTOFFY; + + const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device + + + if(k >= numberOfLBnodes) + return; + + if(!isValidFluidNode(geoD[k])) + return; + + rhoD[k] = c0o1; + vxD[k] = c0o1; + vyD[k] = c0o1; + vzD[k] = c0o1; + + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, neighborZ); + const auto& distribution = distr_wrapper.distribution; + + rhoD[k] = vf::lbm::getDensity(distribution.f); + vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); + vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); + vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); +} + -#include "lbm/MacroscopicQuantities.h" -#include "../Kernel/Utilities/DistributionHelper.cuh" //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMac27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* distributions, - bool isEvenTimestep) +__global__ void LBCalcMacSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - const unsigned int tx = threadIdx.x; // Thread index = lokaler i index - const unsigned int by = blockIdx.x; // Block index x - const unsigned int bz = blockIdx.y; // Block index y - const unsigned int x = tx + STARTOFFX; // Globaler x-Index - const unsigned int y = by + STARTOFFY; // Globaler y-Index - const unsigned int z = bz + STARTOFFZ; // Globaler z-Index + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(nodeIndex<numberOfLBnodes) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= nodeIndex; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = + (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vxD[nodeIndex] = + (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + vyD[nodeIndex] = + (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vzD[nodeIndex] = + (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + 2.f*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + 3.f*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+c0o1*rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]; // times zero for incompressible case + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } +} +//////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + - const unsigned nx = blockDim.x + 2 * STARTOFFX; - const unsigned ny = gridDim.x + 2 * STARTOFFY; - const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device - if(k >= size_Mat) - return; - if(!vf::gpu::isValidFluidNode(geoD[k])) - return; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ); - const auto& distribution = distr_wrapper.distribution; - rhoD[k] = vf::lbm::getDensity(distribution.f); - vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f); - vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f); - vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f); -} -//////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMacSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) -{ - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = (D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]; - - vxD[k] = (D.f[DIR_P00 ])[ke ]- (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]- (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]- (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]; - - vyD[k] = (D.f[DIR_0P0 ])[kn ]- (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]- - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]- (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]; - - vzD[k] = (D.f[DIR_00P ])[kt ]- (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]- - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]- - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]; - - pressD[k] = ((D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - 2.f*( - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ])+ - 3.f*( - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+c0o1*rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - - } - } -} //////////////////////////////////////////////////////////////////////////////// __global__ void LBCalcMacCompSP27( - real *vxD, - real *vyD, - real *vzD, - real *rhoD, - real *pressD, - unsigned int *geoD, - unsigned int *neighborX, - unsigned int *neighborY, - unsigned int *neighborZ, - unsigned int size_Mat, - real *distributions, - bool isEvenTimestep) + real *vxD, + real *vyD, + real *vzD, + real *rhoD, + real *pressD, + unsigned int *geoD, + unsigned int *neighborX, + unsigned int *neighborY, + unsigned int *neighborZ, + unsigned long long numberOfLBnodes, + real *distributions, + bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k >= size_Mat) + if(nodeIndex >= numberOfLBnodes) return; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; - if (!vf::gpu::isValidFluidNode(geoD[k])) + if (!isValidFluidNode(geoD[nodeIndex])) return; - vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, - neighborZ); + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ); const auto &distribution = distr_wrapper.distribution; - rhoD[k] = vf::lbm::getDensity(distribution.f); - vxD[k] = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]); - vyD[k] = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]); - vzD[k] = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]); - pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); + rhoD[nodeIndex] = vf::lbm::getDensity(distribution.f); + vxD[nodeIndex] = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[nodeIndex]); + vyD[nodeIndex] = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[nodeIndex]); + vzD[nodeIndex] = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[nodeIndex]); + pressD[nodeIndex] = vf::lbm::getPressure(distribution.f, rhoD[nodeIndex], vxD[nodeIndex], vyD[nodeIndex], vzD[nodeIndex]); } @@ -339,206 +343,155 @@ __global__ void LBCalcMacCompSP27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = (D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+ - RHO; - - vxD[k] = (D.f[DIR_P00 ])[ke ]- (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]- (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]- (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+ - VX; - - vyD[k] = (D.f[DIR_0P0 ])[kn ]- (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]- - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]- (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+ - VY; - - vzD[k] = (D.f[DIR_00P ])[kt ]- (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]- - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]- - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+ - VZ; - - pressD[k] = ((D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - c2o1*( - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ])+ - c3o1*( - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+ - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= nodeIndex; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = + (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+ + RHO; + + vxD[nodeIndex] = + (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+ + VX; + + vyD[nodeIndex] = + (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+ + VY; + + vzD[nodeIndex] = + (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+ + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + c2o1*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + c3o1*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+ + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -563,259 +516,152 @@ __global__ void LBCalcMedSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMedCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMedCompSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - //index - //unsigned int kzero= k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - real mfcbb = (D.f[DIR_P00])[k];//[ke ]; - real mfabb = (D.f[DIR_M00])[kw];//[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ]; - real mfbab = (D.f[DIR_0M0])[ks];//[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ]; - real mfbba = (D.f[DIR_00M])[kb];//[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ]; - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ]; - real mfacb = (D.f[DIR_MP0])[kw];//[knw ]; - real mfcbc = (D.f[DIR_P0P])[k];//[kte ]; - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ]; - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ]; - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ]; - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ]; - real mfbac = (D.f[DIR_0MP])[ks];//[kts ]; - real mfbbb = (D.f[DIR_000])[k];//[kzero]; - real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ]; - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ]; - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ]; - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; - //////////////////////////////////////////////////////////////////////////////////// - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - - real rho = c1o1 + drho; - - rhoD[k] = drho + RHO; - - vxD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho) + VX; - vyD[k] = - (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho) + VY; - vzD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho) + VZ; - - //rhoD[k] = - // (D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] + - // (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] + - // (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] + - // (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] + - // (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] + - // (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] + - // (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] + - // (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] + - // (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts] + - // (D.f[DIR_000])[kzero] + - // (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] + - // (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] + - // (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] + - // (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw];// +RHO; - - // vxD[k] = - //((D.f[DIR_P00 ])[ke ]- (D.f[DIR_M00 ])[kw ]+ - // (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]+ - // (D.f[DIR_PM0 ])[kse ]- (D.f[DIR_MP0 ])[knw ]+ - // (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]+ - // (D.f[DIR_P0M ])[kbe ]- (D.f[DIR_M0P ])[ktw ]+ - // (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ - // (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ - // (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ - // (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+ - // VX; - - // vyD[k] = - //((D.f[DIR_0P0 ])[kn ]- (D.f[DIR_0M0 ])[ks ]+ - // (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]- - // (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - // (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]+ - // (D.f[DIR_0PM ])[kbn ]- (D.f[DIR_0MP ])[kts ]+ - // (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- - // (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - // (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - // (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+ - // VY; - - // vzD[k] = - //((D.f[DIR_00P ])[kt ]- (D.f[DIR_00M ])[kb ]+ - // (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]- - // (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - // (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]- - // (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - // (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - // (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- - // (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - // (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+ - // VZ; - - pressD[k] = ((D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - c2o1*( - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ])+ - c3o1*( - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])- - rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+ - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + //unsigned int kzero= k; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke ]; + real mfabb = (dist.f[DIR_M00])[kw];//[kw ]; + real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn ]; + real mfbab = (dist.f[DIR_0M0])[ks];//[ks ]; + real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt ]; + real mfbba = (dist.f[DIR_00M])[kb];//[kb ]; + real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne ]; + real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw ]; + real mfcab = (dist.f[DIR_PM0])[ks];//[kse ]; + real mfacb = (dist.f[DIR_MP0])[kw];//[knw ]; + real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte ]; + real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw ]; + real mfcba = (dist.f[DIR_P0M])[kb];//[kbe ]; + real mfabc = (dist.f[DIR_M0P])[kw];//[ktw ]; + real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn ]; + real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs ]; + real mfbca = (dist.f[DIR_0PM])[kb];//[kbn ]; + real mfbac = (dist.f[DIR_0MP])[ks];//[kts ]; + real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero]; + real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; + real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; + real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ]; + real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ]; + real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ]; + real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ]; + real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; + real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; + //////////////////////////////////////////////////////////////////////////////////// + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + + real rho = c1o1 + drho; + + rhoD[nodeIndex] = drho + RHO; + + vxD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) / rho) + VX; + vyD[nodeIndex] = + (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) / rho) + VY; + vzD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) / rho) + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + c2o1*( + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+ + c3o1*( + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])- + rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+ + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -841,309 +687,191 @@ __global__ void LBCalcMedCompSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// __global__ void LBCalcMedCompAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - real* DD_AD, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + real* distributionsAD, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep == true) - { - D.f[DIR_P00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - Distributions27 Dad; - if (isEvenTimestep == true) - { - Dad.f[DIR_P00] = &DD_AD[DIR_P00 *size_Mat]; - Dad.f[DIR_M00] = &DD_AD[DIR_M00 *size_Mat]; - Dad.f[DIR_0P0] = &DD_AD[DIR_0P0 *size_Mat]; - Dad.f[DIR_0M0] = &DD_AD[DIR_0M0 *size_Mat]; - Dad.f[DIR_00P] = &DD_AD[DIR_00P *size_Mat]; - Dad.f[DIR_00M] = &DD_AD[DIR_00M *size_Mat]; - Dad.f[DIR_PP0] = &DD_AD[DIR_PP0 *size_Mat]; - Dad.f[DIR_MM0] = &DD_AD[DIR_MM0 *size_Mat]; - Dad.f[DIR_PM0] = &DD_AD[DIR_PM0 *size_Mat]; - Dad.f[DIR_MP0] = &DD_AD[DIR_MP0 *size_Mat]; - Dad.f[DIR_P0P] = &DD_AD[DIR_P0P *size_Mat]; - Dad.f[DIR_M0M] = &DD_AD[DIR_M0M *size_Mat]; - Dad.f[DIR_P0M] = &DD_AD[DIR_P0M *size_Mat]; - Dad.f[DIR_M0P] = &DD_AD[DIR_M0P *size_Mat]; - Dad.f[DIR_0PP] = &DD_AD[DIR_0PP *size_Mat]; - Dad.f[DIR_0MM] = &DD_AD[DIR_0MM *size_Mat]; - Dad.f[DIR_0PM] = &DD_AD[DIR_0PM *size_Mat]; - Dad.f[DIR_0MP] = &DD_AD[DIR_0MP *size_Mat]; - Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat]; - Dad.f[DIR_PPP] = &DD_AD[DIR_PPP *size_Mat]; - Dad.f[DIR_MMP] = &DD_AD[DIR_MMP *size_Mat]; - Dad.f[DIR_PMP] = &DD_AD[DIR_PMP *size_Mat]; - Dad.f[DIR_MPP] = &DD_AD[DIR_MPP *size_Mat]; - Dad.f[DIR_PPM] = &DD_AD[DIR_PPM *size_Mat]; - Dad.f[DIR_MMM] = &DD_AD[DIR_MMM *size_Mat]; - Dad.f[DIR_PMM] = &DD_AD[DIR_PMM *size_Mat]; - Dad.f[DIR_MPM] = &DD_AD[DIR_MPM *size_Mat]; - } - else - { - Dad.f[DIR_M00] = &DD_AD[DIR_P00 *size_Mat]; - Dad.f[DIR_P00] = &DD_AD[DIR_M00 *size_Mat]; - Dad.f[DIR_0M0] = &DD_AD[DIR_0P0 *size_Mat]; - Dad.f[DIR_0P0] = &DD_AD[DIR_0M0 *size_Mat]; - Dad.f[DIR_00M] = &DD_AD[DIR_00P *size_Mat]; - Dad.f[DIR_00P] = &DD_AD[DIR_00M *size_Mat]; - Dad.f[DIR_MM0] = &DD_AD[DIR_PP0 *size_Mat]; - Dad.f[DIR_PP0] = &DD_AD[DIR_MM0 *size_Mat]; - Dad.f[DIR_MP0] = &DD_AD[DIR_PM0 *size_Mat]; - Dad.f[DIR_PM0] = &DD_AD[DIR_MP0 *size_Mat]; - Dad.f[DIR_M0M] = &DD_AD[DIR_P0P *size_Mat]; - Dad.f[DIR_P0P] = &DD_AD[DIR_M0M *size_Mat]; - Dad.f[DIR_M0P] = &DD_AD[DIR_P0M *size_Mat]; - Dad.f[DIR_P0M] = &DD_AD[DIR_M0P *size_Mat]; - Dad.f[DIR_0MM] = &DD_AD[DIR_0PP *size_Mat]; - Dad.f[DIR_0PP] = &DD_AD[DIR_0MM *size_Mat]; - Dad.f[DIR_0MP] = &DD_AD[DIR_0PM *size_Mat]; - Dad.f[DIR_0PM] = &DD_AD[DIR_0MP *size_Mat]; - Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat]; - Dad.f[DIR_PPP] = &DD_AD[DIR_MMM *size_Mat]; - Dad.f[DIR_MMP] = &DD_AD[DIR_PPM *size_Mat]; - Dad.f[DIR_PMP] = &DD_AD[DIR_MPM *size_Mat]; - Dad.f[DIR_MPP] = &DD_AD[DIR_PMM *size_Mat]; - Dad.f[DIR_PPM] = &DD_AD[DIR_MMP *size_Mat]; - Dad.f[DIR_MMM] = &DD_AD[DIR_PPP *size_Mat]; - Dad.f[DIR_PMM] = &DD_AD[DIR_MPP *size_Mat]; - Dad.f[DIR_MPM] = &DD_AD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - //index - //unsigned int kzero = k; - unsigned int ke = k; - unsigned int kw = neighborX[k]; - unsigned int kn = k; - unsigned int ks = neighborY[k]; - unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = k; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = k; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = k; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - real CONC = concD[k]; - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - concD[k] = c0o1; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if (geoD[k] == GEO_FLUID) - { - real mfcbb = (D.f[DIR_P00])[k];//[ke ]; - real mfabb = (D.f[DIR_M00])[kw];//[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ]; - real mfbab = (D.f[DIR_0M0])[ks];//[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ]; - real mfbba = (D.f[DIR_00M])[kb];//[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ]; - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ]; - real mfacb = (D.f[DIR_MP0])[kw];//[knw ]; - real mfcbc = (D.f[DIR_P0P])[k];//[kte ]; - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ]; - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ]; - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ]; - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ]; - real mfbac = (D.f[DIR_0MP])[ks];//[kts ]; - real mfbbb = (D.f[DIR_000])[k];//[kzero]; - real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ]; - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ]; - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ]; - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; - //////////////////////////////////////////////////////////////////////////////////// - real drho = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - real rho = c1o1 + drho; - //////////////////////////////////////////////////////////////////////////////////// - - rhoD[k] = drho + RHO; - - vxD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho) + VX; - - vyD[k] = - (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho) + VY; - - vzD[k] = - (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho) + VZ; - - pressD[k] = - ((D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] + - (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] + - (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] + - c2o1*( - (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] + - (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] + - (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] + - (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] + - (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] + - (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts]) + - c3o1*( - (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] + - (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] + - (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] + - (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw]) - - rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * c1o2 + rhoD[k] + - PRESS; - //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 - ////////////////////////////////////////////////////////////////////////// - mfcbb = (Dad.f[DIR_P00 ])[k ]; - mfabb = (Dad.f[DIR_M00 ])[kw ]; - mfbcb = (Dad.f[DIR_0P0 ])[k ]; - mfbab = (Dad.f[DIR_0M0 ])[ks ]; - mfbbc = (Dad.f[DIR_00P ])[k ]; - mfbba = (Dad.f[DIR_00M ])[kb ]; - mfccb = (Dad.f[DIR_PP0 ])[k ]; - mfaab = (Dad.f[DIR_MM0 ])[ksw ]; - mfcab = (Dad.f[DIR_PM0 ])[ks ]; - mfacb = (Dad.f[DIR_MP0 ])[kw ]; - mfcbc = (Dad.f[DIR_P0P ])[k ]; - mfaba = (Dad.f[DIR_M0M ])[kbw ]; - mfcba = (Dad.f[DIR_P0M ])[kb ]; - mfabc = (Dad.f[DIR_M0P ])[kw ]; - mfbcc = (Dad.f[DIR_0PP ])[k ]; - mfbaa = (Dad.f[DIR_0MM ])[kbs ]; - mfbca = (Dad.f[DIR_0PM ])[kb ]; - mfbac = (Dad.f[DIR_0MP ])[ks ]; - mfbbb = (Dad.f[DIR_000])[k ]; - mfccc = (Dad.f[DIR_PPP ])[k ]; - mfaac = (Dad.f[DIR_MMP ])[ksw ]; - mfcac = (Dad.f[DIR_PMP ])[ks ]; - mfacc = (Dad.f[DIR_MPP ])[kw ]; - mfcca = (Dad.f[DIR_PPM ])[kb ]; - mfaaa = (Dad.f[DIR_MMM ])[kbsw]; - mfcaa = (Dad.f[DIR_PMM ])[kbs ]; - mfaca = (Dad.f[DIR_MPM ])[kbw ]; - ////////////////////////////////////////////////////////////////////////// - concD[k] = - ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb + CONC; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if ( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist, distAD; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + getPointersToDistributions(distAD, distributionsAD, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + //unsigned int kzero = k; + unsigned int ke = nodeIndex; + unsigned int kw = neighborX[nodeIndex]; + unsigned int kn = nodeIndex; + unsigned int ks = neighborY[nodeIndex]; + unsigned int kt = nodeIndex; + unsigned int kb = neighborZ[nodeIndex]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = nodeIndex; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = nodeIndex; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = nodeIndex; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = nodeIndex; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + real CONC = concD[nodeIndex]; + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + concD[nodeIndex] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if (geoD[nodeIndex] == GEO_FLUID) + { + real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke ]; + real mfabb = (dist.f[DIR_M00])[kw];//[kw ]; + real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn ]; + real mfbab = (dist.f[DIR_0M0])[ks];//[ks ]; + real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt ]; + real mfbba = (dist.f[DIR_00M])[kb];//[kb ]; + real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne ]; + real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw ]; + real mfcab = (dist.f[DIR_PM0])[ks];//[kse ]; + real mfacb = (dist.f[DIR_MP0])[kw];//[knw ]; + real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte ]; + real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw ]; + real mfcba = (dist.f[DIR_P0M])[kb];//[kbe ]; + real mfabc = (dist.f[DIR_M0P])[kw];//[ktw ]; + real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn ]; + real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs ]; + real mfbca = (dist.f[DIR_0PM])[kb];//[kbn ]; + real mfbac = (dist.f[DIR_0MP])[ks];//[kts ]; + real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero]; + real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; + real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; + real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ]; + real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ]; + real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ]; + real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ]; + real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; + real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; + //////////////////////////////////////////////////////////////////////////////////// + real drho = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; + real rho = c1o1 + drho; + //////////////////////////////////////////////////////////////////////////////////// + + rhoD[nodeIndex] = drho + RHO; + + vxD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) / rho) + VX; + + vyD[nodeIndex] = + (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) / rho) + VY; + + vzD[nodeIndex] = + (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) / rho) + VZ; + + pressD[nodeIndex] = + ((dist.f[DIR_P00])[ke] + (dist.f[DIR_M00])[kw] + + (dist.f[DIR_0P0])[kn] + (dist.f[DIR_0M0])[ks] + + (dist.f[DIR_00P])[kt] + (dist.f[DIR_00M])[kb] + + c2o1*( + (dist.f[DIR_PP0])[kne] + (dist.f[DIR_MM0])[ksw] + + (dist.f[DIR_PM0])[kse] + (dist.f[DIR_MP0])[knw] + + (dist.f[DIR_P0P])[kte] + (dist.f[DIR_M0M])[kbw] + + (dist.f[DIR_P0M])[kbe] + (dist.f[DIR_M0P])[ktw] + + (dist.f[DIR_0PP])[ktn] + (dist.f[DIR_0MM])[kbs] + + (dist.f[DIR_0PM])[kbn] + (dist.f[DIR_0MP])[kts]) + + c3o1*( + (dist.f[DIR_PPP])[ktne] + (dist.f[DIR_MMP])[ktsw] + + (dist.f[DIR_PMP])[ktse] + (dist.f[DIR_MPP])[ktnw] + + (dist.f[DIR_PPM])[kbne] + (dist.f[DIR_MMM])[kbsw] + + (dist.f[DIR_PMM])[kbse] + (dist.f[DIR_MPM])[kbnw]) - + rhoD[nodeIndex] - (vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1 + rhoD[nodeIndex])) * c1o2 + rhoD[nodeIndex] + + PRESS; + //achtung op hart gesetzt Annahme op = 1 ; ^^^^(1.0/op-0.5)=0.5 + ////////////////////////////////////////////////////////////////////////// + mfcbb = (distAD.f[DIR_P00])[nodeIndex ]; + mfabb = (distAD.f[DIR_M00])[kw ]; + mfbcb = (distAD.f[DIR_0P0])[nodeIndex ]; + mfbab = (distAD.f[DIR_0M0])[ks ]; + mfbbc = (distAD.f[DIR_00P])[nodeIndex ]; + mfbba = (distAD.f[DIR_00M])[kb ]; + mfccb = (distAD.f[DIR_PP0])[nodeIndex ]; + mfaab = (distAD.f[DIR_MM0])[ksw ]; + mfcab = (distAD.f[DIR_PM0])[ks ]; + mfacb = (distAD.f[DIR_MP0])[kw ]; + mfcbc = (distAD.f[DIR_P0P])[nodeIndex ]; + mfaba = (distAD.f[DIR_M0M])[kbw ]; + mfcba = (distAD.f[DIR_P0M])[kb ]; + mfabc = (distAD.f[DIR_M0P])[kw ]; + mfbcc = (distAD.f[DIR_0PP])[nodeIndex ]; + mfbaa = (distAD.f[DIR_0MM])[kbs ]; + mfbca = (distAD.f[DIR_0PM])[kb ]; + mfbac = (distAD.f[DIR_0MP])[ks ]; + mfbbb = (distAD.f[DIR_000])[nodeIndex ]; + mfccc = (distAD.f[DIR_PPP])[nodeIndex ]; + mfaac = (distAD.f[DIR_MMP])[ksw ]; + mfcac = (distAD.f[DIR_PMP])[ks ]; + mfacc = (distAD.f[DIR_MPP])[kw ]; + mfcca = (distAD.f[DIR_PPM])[kb ]; + mfaaa = (distAD.f[DIR_MMM])[kbsw]; + mfcaa = (distAD.f[DIR_PMM])[kbs ]; + mfaca = (distAD.f[DIR_MPM])[kbw ]; + ////////////////////////////////////////////////////////////////////////// + concD[nodeIndex] = + ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb + CONC; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1168,54 +896,50 @@ __global__ void LBCalcMedCompAD27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMacMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int tdiff, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void LBCalcMacMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int tdiff, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - real PRESS = pressD[k]; - real RHO = rhoD[k]; - real VX = vxD[k]; - real VY = vyD[k]; - real VZ = vzD[k]; - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - - if(geoD[k] == GEO_FLUID) - { - rhoD[k] = RHO / tdiff; - vxD[k] = VX / tdiff; - vyD[k] = VY / tdiff; - vzD[k] = VZ / tdiff; - pressD[k] = PRESS / tdiff; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if(nodeIndex<numberOfLBnodes) + { + ////////////////////////////////////////////////////////////////////////// + real PRESS = pressD[nodeIndex]; + real RHO = rhoD[nodeIndex]; + real VX = vxD[nodeIndex]; + real VY = vyD[nodeIndex]; + real VZ = vzD[nodeIndex]; + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + + if(geoD[nodeIndex] == GEO_FLUID) + { + rhoD[nodeIndex] = RHO / tdiff; + vxD[nodeIndex] = VX / tdiff; + vyD[nodeIndex] = VY / tdiff; + vzD[nodeIndex] = VZ / tdiff; + pressD[nodeIndex] = PRESS / tdiff; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1241,34 +965,29 @@ __global__ void LBCalcMacMedSP27( real* vxD, //////////////////////////////////////////////////////////////////////////////// __global__ void LBResetMedianValuesSP27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int size_Mat, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k<size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if ( nodeIndex < numberOfLBnodes ) + { + ////////////////////////////////////////////////////////////////////////// + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + } } //////////////////////////////////////////////////////////////////////////////// @@ -1294,36 +1013,30 @@ __global__ void LBResetMedianValuesSP27( //////////////////////////////////////////////////////////////////////////////// __global__ void LBResetMedianValuesAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int size_Mat, - bool isEvenTimestep) + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k < size_Mat) - { - ////////////////////////////////////////////////////////////////////////// - concD[k] = c0o1; - pressD[k] = c0o1; - rhoD[k] = c0o1; - vxD[k] = c0o1; - vyD[k] = c0o1; - vzD[k] = c0o1; - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if (nodeIndex < numberOfLBnodes) + { + concD[nodeIndex] = c0o1; + pressD[nodeIndex] = c0o1; + rhoD[nodeIndex] = c0o1; + vxD[nodeIndex] = c0o1; + vyD[nodeIndex] = c0o1; + vzD[nodeIndex] = c0o1; + } } //////////////////////////////////////////////////////////////////////////////// @@ -1348,177 +1061,121 @@ __global__ void LBResetMedianValuesAD27( //////////////////////////////////////////////////////////////////////////////// -__global__ void LBCalcMeasurePoints( real* vxMP, - real* vyMP, - real* vzMP, - real* rhoMP, - unsigned int* kMP, - unsigned int numberOfPointskMP, - unsigned int MPClockCycle, - unsigned int t, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) +__global__ void LBCalcMeasurePoints( + real* vxMP, + real* vyMP, + real* vzMP, + real* rhoMP, + unsigned int* kMP, + unsigned int numberOfPointskMP, + unsigned int MPClockCycle, + unsigned int t, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* distributions, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfPointskMP) - { - ////////////////////////////////////////////////////////////////////////// - //index - unsigned int kzero= kMP[k];//k; - unsigned int ke = kzero; - unsigned int kw = neighborX[kzero]; - unsigned int kn = kzero; - unsigned int ks = neighborY[kzero]; - unsigned int kt = kzero; - unsigned int kb = neighborZ[kzero]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = kzero; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = kzero; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = kzero; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = kzero; - unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////// - unsigned int kMac = k*MPClockCycle + t; - ////////////////////////////////////////////////////////////////////////// - - if(geoD[kzero] == GEO_FLUID) - { - rhoMP[kMac]= (D.f[DIR_P00 ])[ke ]+ (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_0P0 ])[kn ]+ (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_00P ])[kt ]+ (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_PP0 ])[kne ]+ (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]+ (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]+ (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_000])[kzero]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]; - - vxMP[kMac] = (D.f[DIR_P00 ])[ke ]- (D.f[DIR_M00 ])[kw ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]+ - (D.f[DIR_PM0 ])[kse ]- (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]+ - (D.f[DIR_P0M ])[kbe ]- (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]; - - vyMP[kMac] = (D.f[DIR_0P0 ])[kn ]- (D.f[DIR_0M0 ])[ks ]+ - (D.f[DIR_PP0 ])[kne ]- (D.f[DIR_MM0 ])[ksw ]- - (D.f[DIR_PM0 ])[kse ]+ (D.f[DIR_MP0 ])[knw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]+ - (D.f[DIR_0PM ])[kbn ]- (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]; - - vzMP[kMac] = (D.f[DIR_00P ])[kt ]- (D.f[DIR_00M ])[kb ]+ - (D.f[DIR_P0P ])[kte ]- (D.f[DIR_M0M ])[kbw ]- - (D.f[DIR_P0M ])[kbe ]+ (D.f[DIR_M0P ])[ktw ]+ - (D.f[DIR_0PP ])[ktn ]- (D.f[DIR_0MM ])[kbs ]- - (D.f[DIR_0PM ])[kbn ]+ (D.f[DIR_0MP ])[kts ]+ - (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ - (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- - (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- - (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]; - } - } + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex < numberOfPointskMP ) + { + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on + //! timestep is based on the esoteric twist algorithm \ref <a + //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), + //! DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + ////////////////////////////////////////////////////////////////////////// + //index + unsigned int kzero= kMP[nodeIndex];//k; + unsigned int ke = kzero; + unsigned int kw = neighborX[kzero]; + unsigned int kn = kzero; + unsigned int ks = neighborY[kzero]; + unsigned int kt = kzero; + unsigned int kb = neighborZ[kzero]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = kzero; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = kzero; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = kzero; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = kzero; + unsigned int kbsw = neighborZ[ksw]; + ////////////////////////////////////////////////////////////////////////// + unsigned int kMac = nodeIndex*MPClockCycle + t; + ////////////////////////////////////////////////////////////////////////// + + if(geoD[kzero] == GEO_FLUID) + { + rhoMP[kMac]= (dist.f[DIR_P00])[ke ]+ (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_0P0])[kn ]+ (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_00P])[kt ]+ (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_000])[kzero]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vxMP[kMac] = (dist.f[DIR_P00])[ke ]- (dist.f[DIR_M00])[kw ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+ + (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+ + (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + + vyMP[kMac] = (dist.f[DIR_0P0])[kn ]- (dist.f[DIR_0M0])[ks ]+ + (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]- + (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+ + (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]; + + vzMP[kMac] = (dist.f[DIR_00P])[kt ]- (dist.f[DIR_00M])[kb ]+ + (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]- + (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+ + (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]- + (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+ + (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ + (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- + (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- + (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]; + } + } } //////////////////////////////////////////////////////////////////////////////// @@ -1559,40 +1216,36 @@ __global__ void LBCalcMeasurePoints( real* vxMP, //////////////////////////////////////////////////////////////////////////////// -__global__ void LBSetOutputWallVelocitySP27( real* vxD, - real* vyD, - real* vzD, - real* vxWall, - real* vyWall, - real* vzWall, - int numberOfWallNodes, - int* kWallNodes, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) +__global__ void LBSetOutputWallVelocitySP27( + real* vxD, + real* vyD, + real* vzD, + real* vxWall, + real* vyWall, + real* vzWall, + int numberOfWallNodes, + int* kWallNodes, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - - if(k<numberOfWallNodes) + if(nodeIndex<numberOfWallNodes) { ////////////////////////////////////////////////////////////////////////// //index - unsigned int KWN = kWallNodes[k]; + unsigned int KWN = kWallNodes[nodeIndex]; ////////////////////////////////////////////////////////////////////////// vxD[KWN] = 0.0;//vxWall[k]; vyD[KWN] = 0.0;//vyWall[k]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu index a79588421a624cae62ec32127739efb47bb7b2ef..457623d4ee62b624248306b6b900fcff3f026286 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu @@ -15,7 +15,7 @@ __global__ void LB_Kernel_Cascade_SP_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -29,7 +29,7 @@ __global__ void LB_Kernel_Cascade_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cascade_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cascade_SP_27( real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k ];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + @@ -842,7 +842,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -856,7 +856,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -867,63 +867,63 @@ __global__ void LB_Kernel_Casc_Comp_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -956,33 +956,33 @@ __global__ void LB_Kernel_Casc_Comp_SP_27( real omega, unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ];// + c2over27 ; - real f_W = (D.f[DIR_M00 ])[kw ];// + c2over27 ; - real f_N = (D.f[DIR_0P0 ])[kn ];// + c2over27 ; - real f_S = (D.f[DIR_0M0 ])[ks ];// + c2over27 ; - real f_F = (D.f[DIR_00P ])[kt ];// + c2over27 ; - real f_B = (D.f[DIR_00M ])[kb ];// + c2over27 ; - real f_NE = (D.f[DIR_PP0 ])[kne ];// + c1over54 ; - real f_SW = (D.f[DIR_MM0 ])[ksw ];// + c1over54 ; - real f_SE = (D.f[DIR_PM0 ])[kse ];// + c1over54 ; - real f_NW = (D.f[DIR_MP0 ])[knw ];// + c1over54 ; - real f_Ef = (D.f[DIR_P0P ])[kte ];// + c1over54 ; - real f_Wb = (D.f[DIR_M0M ])[kbw ];// + c1over54 ; - real f_Eb = (D.f[DIR_P0M ])[kbe ];// + c1over54 ; - real f_Wf = (D.f[DIR_M0P ])[ktw ];// + c1over54 ; - real f_Nf = (D.f[DIR_0PP ])[ktn ];// + c1over54 ; - real f_Sb = (D.f[DIR_0MM ])[kbs ];// + c1over54 ; - real f_Nb = (D.f[DIR_0PM ])[kbn ];// + c1over54 ; - real f_Sf = (D.f[DIR_0MP ])[kts ];// + c1over54 ; + real f_E = (D.f[DIR_P00])[ke ];// + c2over27 ; + real f_W = (D.f[DIR_M00])[kw ];// + c2over27 ; + real f_N = (D.f[DIR_0P0])[kn ];// + c2over27 ; + real f_S = (D.f[DIR_0M0])[ks ];// + c2over27 ; + real f_F = (D.f[DIR_00P])[kt ];// + c2over27 ; + real f_B = (D.f[DIR_00M])[kb ];// + c2over27 ; + real f_NE = (D.f[DIR_PP0])[kne ];// + c1over54 ; + real f_SW = (D.f[DIR_MM0])[ksw ];// + c1over54 ; + real f_SE = (D.f[DIR_PM0])[kse ];// + c1over54 ; + real f_NW = (D.f[DIR_MP0])[knw ];// + c1over54 ; + real f_Ef = (D.f[DIR_P0P])[kte ];// + c1over54 ; + real f_Wb = (D.f[DIR_M0M])[kbw ];// + c1over54 ; + real f_Eb = (D.f[DIR_P0M])[kbe ];// + c1over54 ; + real f_Wf = (D.f[DIR_M0P])[ktw ];// + c1over54 ; + real f_Nf = (D.f[DIR_0PP])[ktn ];// + c1over54 ; + real f_Sb = (D.f[DIR_0MM])[kbs ];// + c1over54 ; + real f_Nb = (D.f[DIR_0PM])[kbn ];// + c1over54 ; + real f_Sf = (D.f[DIR_0MP])[kts ];// + c1over54 ; real f_R = (D.f[DIR_000])[kzero];// + c8over27 ; - real f_Nef = (D.f[DIR_PPP ])[ktne ];// + c1over216; - real f_Swf = (D.f[DIR_MMP ])[ktsw ];// + c1over216; - real f_Sef = (D.f[DIR_PMP ])[ktse ];// + c1over216; - real f_Nwf = (D.f[DIR_MPP ])[ktnw ];// + c1over216; - real f_Neb = (D.f[DIR_PPM ])[kbne ];// + c1over216; - real f_Swb = (D.f[DIR_MMM ])[kbsw ];// + c1over216; - real f_Seb = (D.f[DIR_PMM ])[kbse ];// + c1over216; - real f_Nwb = (D.f[DIR_MPM ])[kbnw ];// + c1over216; + real f_Nef = (D.f[DIR_PPP])[ktne ];// + c1over216; + real f_Swf = (D.f[DIR_MMP])[ktsw ];// + c1over216; + real f_Sef = (D.f[DIR_PMP])[ktse ];// + c1over216; + real f_Nwf = (D.f[DIR_MPP])[ktnw ];// + c1over216; + real f_Neb = (D.f[DIR_PPM])[kbne ];// + c1over216; + real f_Swb = (D.f[DIR_MMM])[kbsw ];// + c1over216; + real f_Seb = (D.f[DIR_PMM])[kbse ];// + c1over216; + real f_Nwb = (D.f[DIR_MPM])[kbnw ];// + c1over216; //////////////////////////////////////////////////////////////////////////////////// real rho=f_NW+f_W+f_SW+f_S+f_SE+f_E+f_NE+f_N+f_R+f_Nf+f_Nb+f_Sf+f_Sb+f_Ef+f_Eb+f_Wf+f_Wb+f_Nwf+f_Nwb+f_Nef+f_Neb+f_Swf+f_Swb+f_Sef+f_Seb+f_F+f_B+c1o1;// ACHTUNG ne EINS !!!!!!!! real pix=(f_NE+f_E+f_SE+f_Ef+f_Eb-f_NW-f_W-f_SW-f_Wf-f_Wb+f_Nef+f_Neb+f_Sef+f_Seb-f_Nwf-f_Nwb-f_Swf-f_Swb); @@ -1689,7 +1689,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -1703,7 +1703,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -1714,63 +1714,63 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1803,33 +1803,33 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real fE = (D.f[DIR_P00 ])[k ];//ke - real fW = (D.f[DIR_M00 ])[kw ]; - real fN = (D.f[DIR_0P0 ])[k ];//kn - real fS = (D.f[DIR_0M0 ])[ks ]; - real fT = (D.f[DIR_00P ])[k ];//kt - real fB = (D.f[DIR_00M ])[kb ]; - real fNE = (D.f[DIR_PP0 ])[k ];//kne - real fSW = (D.f[DIR_MM0 ])[ksw]; - real fSE = (D.f[DIR_PM0 ])[ks ];//kse - real fNW = (D.f[DIR_MP0 ])[kw ];//knw - real fTE = (D.f[DIR_P0P ])[k ];//kte - real fBW = (D.f[DIR_M0M ])[kbw]; - real fBE = (D.f[DIR_P0M ])[kb ];//kbe - real fTW = (D.f[DIR_M0P ])[kw ];//ktw - real fTN = (D.f[DIR_0PP ])[k ];//ktn - real fBS = (D.f[DIR_0MM ])[kbs]; - real fBN = (D.f[DIR_0PM ])[kb ];//kbn - real fTS = (D.f[DIR_0MP ])[ks ];//kts + real fE = (D.f[DIR_P00])[k ];//ke + real fW = (D.f[DIR_M00])[kw ]; + real fN = (D.f[DIR_0P0])[k ];//kn + real fS = (D.f[DIR_0M0])[ks ]; + real fT = (D.f[DIR_00P])[k ];//kt + real fB = (D.f[DIR_00M])[kb ]; + real fNE = (D.f[DIR_PP0])[k ];//kne + real fSW = (D.f[DIR_MM0])[ksw]; + real fSE = (D.f[DIR_PM0])[ks ];//kse + real fNW = (D.f[DIR_MP0])[kw ];//knw + real fTE = (D.f[DIR_P0P])[k ];//kte + real fBW = (D.f[DIR_M0M])[kbw]; + real fBE = (D.f[DIR_P0M])[kb ];//kbe + real fTW = (D.f[DIR_M0P])[kw ];//ktw + real fTN = (D.f[DIR_0PP])[k ];//ktn + real fBS = (D.f[DIR_0MM])[kbs]; + real fBN = (D.f[DIR_0PM])[kb ];//kbn + real fTS = (D.f[DIR_0MP])[ks ];//kts real fZERO = (D.f[DIR_000])[k ];//kzero - real fTNE = (D.f[DIR_PPP ])[k ];//ktne - real fTSW = (D.f[DIR_MMP ])[ksw];//ktsw - real fTSE = (D.f[DIR_PMP ])[ks ];//ktse - real fTNW = (D.f[DIR_MPP ])[kw ];//ktnw - real fBNE = (D.f[DIR_PPM ])[kb ];//kbne - real fBSW = (D.f[DIR_MMM ])[kbsw]; - real fBSE = (D.f[DIR_PMM ])[kbs];//kbse - real fBNW = (D.f[DIR_MPM ])[kbw];//kbnw + real fTNE = (D.f[DIR_PPP])[k ];//ktne + real fTSW = (D.f[DIR_MMP])[ksw];//ktsw + real fTSE = (D.f[DIR_PMP])[ks ];//ktse + real fTNW = (D.f[DIR_MPP])[kw ];//ktnw + real fBNE = (D.f[DIR_PPM])[kb ];//kbne + real fBSW = (D.f[DIR_MMM])[kbsw]; + real fBSE = (D.f[DIR_PMM])[kbs];//kbse + real fBNW = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// real rho0 = (fTNE+fBSW)+(fTSW+fBNE)+(fTSE+fBNW)+(fTNW+fBSE)+(fNE+fSW)+(fNW+fSE)+(fTE+fBW)+(fBE+fTW)+(fTN+fBS)+(fBN+fTS)+(fE+fW)+(fN+fS)+(fT+fB)+fZERO; real rho = rho0 + c1o1; @@ -2321,7 +2321,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -2335,7 +2335,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -2346,63 +2346,63 @@ __global__ void LB_Kernel_Casc_SP_MS_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -2435,33 +2435,33 @@ __global__ void LB_Kernel_Casc_SP_MS_27( real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real fE = (D.f[DIR_P00 ])[k ];//ke - real fW = (D.f[DIR_M00 ])[kw ]; - real fN = (D.f[DIR_0P0 ])[k ];//kn - real fS = (D.f[DIR_0M0 ])[ks ]; - real fT = (D.f[DIR_00P ])[k ];//kt - real fB = (D.f[DIR_00M ])[kb ]; - real fNE = (D.f[DIR_PP0 ])[k ];//kne - real fSW = (D.f[DIR_MM0 ])[ksw]; - real fSE = (D.f[DIR_PM0 ])[ks ];//kse - real fNW = (D.f[DIR_MP0 ])[kw ];//knw - real fTE = (D.f[DIR_P0P ])[k ];//kte - real fBW = (D.f[DIR_M0M ])[kbw]; - real fBE = (D.f[DIR_P0M ])[kb ];//kbe - real fTW = (D.f[DIR_M0P ])[kw ];//ktw - real fTN = (D.f[DIR_0PP ])[k ];//ktn - real fBS = (D.f[DIR_0MM ])[kbs]; - real fBN = (D.f[DIR_0PM ])[kb ];//kbn - real fTS = (D.f[DIR_0MP ])[ks ];//kts + real fE = (D.f[DIR_P00])[k ];//ke + real fW = (D.f[DIR_M00])[kw ]; + real fN = (D.f[DIR_0P0])[k ];//kn + real fS = (D.f[DIR_0M0])[ks ]; + real fT = (D.f[DIR_00P])[k ];//kt + real fB = (D.f[DIR_00M])[kb ]; + real fNE = (D.f[DIR_PP0])[k ];//kne + real fSW = (D.f[DIR_MM0])[ksw]; + real fSE = (D.f[DIR_PM0])[ks ];//kse + real fNW = (D.f[DIR_MP0])[kw ];//knw + real fTE = (D.f[DIR_P0P])[k ];//kte + real fBW = (D.f[DIR_M0M])[kbw]; + real fBE = (D.f[DIR_P0M])[kb ];//kbe + real fTW = (D.f[DIR_M0P])[kw ];//ktw + real fTN = (D.f[DIR_0PP])[k ];//ktn + real fBS = (D.f[DIR_0MM])[kbs]; + real fBN = (D.f[DIR_0PM])[kb ];//kbn + real fTS = (D.f[DIR_0MP])[ks ];//kts real fZERO = (D.f[DIR_000])[k ];//kzero - real fTNE = (D.f[DIR_PPP ])[k ];//ktne - real fTSW = (D.f[DIR_MMP ])[ksw];//ktsw - real fTSE = (D.f[DIR_PMP ])[ks ];//ktse - real fTNW = (D.f[DIR_MPP ])[kw ];//ktnw - real fBNE = (D.f[DIR_PPM ])[kb ];//kbne - real fBSW = (D.f[DIR_MMM ])[kbsw]; - real fBSE = (D.f[DIR_PMM ])[kbs];//kbse - real fBNW = (D.f[DIR_MPM ])[kbw];//kbnw + real fTNE = (D.f[DIR_PPP])[k ];//ktne + real fTSW = (D.f[DIR_MMP])[ksw];//ktsw + real fTSE = (D.f[DIR_PMP])[ks ];//ktse + real fTNW = (D.f[DIR_MPP])[kw ];//ktnw + real fBNE = (D.f[DIR_PPM])[kb ];//kbne + real fBSW = (D.f[DIR_MMM])[kbsw]; + real fBSE = (D.f[DIR_PMM])[kbs];//kbse + real fBNW = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// real rho0 = fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW; real rho = rho0 + c1o1; @@ -2846,7 +2846,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -2860,7 +2860,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -2871,63 +2871,63 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -2960,33 +2960,33 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real fE = (D.f[DIR_P00 ])[k ];//ke - real fW = (D.f[DIR_M00 ])[kw ]; - real fN = (D.f[DIR_0P0 ])[k ];//kn - real fS = (D.f[DIR_0M0 ])[ks ]; - real fT = (D.f[DIR_00P ])[k ];//kt - real fB = (D.f[DIR_00M ])[kb ]; - real fNE = (D.f[DIR_PP0 ])[k ];//kne - real fSW = (D.f[DIR_MM0 ])[ksw]; - real fSE = (D.f[DIR_PM0 ])[ks ];//kse - real fNW = (D.f[DIR_MP0 ])[kw ];//knw - real fTE = (D.f[DIR_P0P ])[k ];//kte - real fBW = (D.f[DIR_M0M ])[kbw]; - real fBE = (D.f[DIR_P0M ])[kb ];//kbe - real fTW = (D.f[DIR_M0P ])[kw ];//ktw - real fTN = (D.f[DIR_0PP ])[k ];//ktn - real fBS = (D.f[DIR_0MM ])[kbs]; - real fBN = (D.f[DIR_0PM ])[kb ];//kbn - real fTS = (D.f[DIR_0MP ])[ks ];//kts + real fE = (D.f[DIR_P00])[k ];//ke + real fW = (D.f[DIR_M00])[kw ]; + real fN = (D.f[DIR_0P0])[k ];//kn + real fS = (D.f[DIR_0M0])[ks ]; + real fT = (D.f[DIR_00P])[k ];//kt + real fB = (D.f[DIR_00M])[kb ]; + real fNE = (D.f[DIR_PP0])[k ];//kne + real fSW = (D.f[DIR_MM0])[ksw]; + real fSE = (D.f[DIR_PM0])[ks ];//kse + real fNW = (D.f[DIR_MP0])[kw ];//knw + real fTE = (D.f[DIR_P0P])[k ];//kte + real fBW = (D.f[DIR_M0M])[kbw]; + real fBE = (D.f[DIR_P0M])[kb ];//kbe + real fTW = (D.f[DIR_M0P])[kw ];//ktw + real fTN = (D.f[DIR_0PP])[k ];//ktn + real fBS = (D.f[DIR_0MM])[kbs]; + real fBN = (D.f[DIR_0PM])[kb ];//kbn + real fTS = (D.f[DIR_0MP])[ks ];//kts real fZERO = (D.f[DIR_000])[k ];//kzero - real fTNE = (D.f[DIR_PPP ])[k ];//ktne - real fTSW = (D.f[DIR_MMP ])[ksw];//ktsw - real fTSE = (D.f[DIR_PMP ])[ks ];//ktse - real fTNW = (D.f[DIR_MPP ])[kw ];//ktnw - real fBNE = (D.f[DIR_PPM ])[kb ];//kbne - real fBSW = (D.f[DIR_MMM ])[kbsw]; - real fBSE = (D.f[DIR_PMM ])[kbs];//kbse - real fBNW = (D.f[DIR_MPM ])[kbw];//kbnw + real fTNE = (D.f[DIR_PPP])[k ];//ktne + real fTSW = (D.f[DIR_MMP])[ksw];//ktsw + real fTSE = (D.f[DIR_PMP])[ks ];//ktse + real fTNW = (D.f[DIR_MPP])[kw ];//ktnw + real fBNE = (D.f[DIR_PPM])[kb ];//kbne + real fBSW = (D.f[DIR_MMM])[kbsw]; + real fBSE = (D.f[DIR_PMM])[kbs];//kbse + real fBNW = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// real rho0 = fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW; real rho = rho0 + c1o1; @@ -3368,7 +3368,7 @@ __global__ void LB_Kernel_Casc_SP_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -3382,7 +3382,7 @@ __global__ void LB_Kernel_Casc_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -3393,63 +3393,63 @@ __global__ void LB_Kernel_Casc_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -3512,33 +3512,33 @@ __global__ void LB_Kernel_Casc_SP_27( real omega, ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - f_E = (D.f[DIR_P00 ])[ke]+c2o27; - f_W = (D.f[DIR_M00 ])[kw]+c2o27; - f_N = (D.f[DIR_0P0 ])[kn]+c2o27; - f_S = (D.f[DIR_0M0 ])[ks]+c2o27; - f_T = (D.f[DIR_00P ])[kt]+c2o27; - f_B = (D.f[DIR_00M ])[kb]+c2o27; - f_NE = (D.f[DIR_PP0 ])[kne]+c1o54; - f_SW = (D.f[DIR_MM0 ])[ksw]+c1o54; - f_SE = (D.f[DIR_PM0 ])[kse]+c1o54; - f_NW = (D.f[DIR_MP0 ])[knw]+c1o54; - f_TE = (D.f[DIR_P0P ])[kte]+c1o54; - f_BW = (D.f[DIR_M0M ])[kbw]+c1o54; - f_BE = (D.f[DIR_P0M ])[kbe]+c1o54; - f_TW = (D.f[DIR_M0P ])[ktw]+c1o54; - f_TN = (D.f[DIR_0PP ])[ktn]+c1o54; - f_BS = (D.f[DIR_0MM ])[kbs]+c1o54; - f_BN = (D.f[DIR_0PM ])[kbn]+c1o54; - f_TS = (D.f[DIR_0MP ])[kts]+c1o54; + f_E = (D.f[DIR_P00])[ke]+c2o27; + f_W = (D.f[DIR_M00])[kw]+c2o27; + f_N = (D.f[DIR_0P0])[kn]+c2o27; + f_S = (D.f[DIR_0M0])[ks]+c2o27; + f_T = (D.f[DIR_00P])[kt]+c2o27; + f_B = (D.f[DIR_00M])[kb]+c2o27; + f_NE = (D.f[DIR_PP0])[kne]+c1o54; + f_SW = (D.f[DIR_MM0])[ksw]+c1o54; + f_SE = (D.f[DIR_PM0])[kse]+c1o54; + f_NW = (D.f[DIR_MP0])[knw]+c1o54; + f_TE = (D.f[DIR_P0P])[kte]+c1o54; + f_BW = (D.f[DIR_M0M])[kbw]+c1o54; + f_BE = (D.f[DIR_P0M])[kbe]+c1o54; + f_TW = (D.f[DIR_M0P])[ktw]+c1o54; + f_TN = (D.f[DIR_0PP])[ktn]+c1o54; + f_BS = (D.f[DIR_0MM])[kbs]+c1o54; + f_BN = (D.f[DIR_0PM])[kbn]+c1o54; + f_TS = (D.f[DIR_0MP])[kts]+c1o54; f_ZERO = (D.f[DIR_000])[kzero]+c8o27; - f_TNE = (D.f[DIR_PPP ])[ktne]+c1o216; - f_TSW = (D.f[DIR_MMP ])[ktsw]+c1o216; - f_TSE = (D.f[DIR_PMP ])[ktse]+c1o216; - f_TNW = (D.f[DIR_MPP ])[ktnw]+c1o216; - f_BNE = (D.f[DIR_PPM ])[kbne]+c1o216; - f_BSW = (D.f[DIR_MMM ])[kbsw]+c1o216; - f_BSE = (D.f[DIR_PMM ])[kbse]+c1o216; - f_BNW = (D.f[DIR_MPM ])[kbnw]+c1o216; + f_TNE = (D.f[DIR_PPP])[ktne]+c1o216; + f_TSW = (D.f[DIR_MMP])[ktsw]+c1o216; + f_TSE = (D.f[DIR_PMP])[ktse]+c1o216; + f_TNW = (D.f[DIR_MPP])[ktnw]+c1o216; + f_BNE = (D.f[DIR_PPM])[kbne]+c1o216; + f_BSW = (D.f[DIR_MMM])[kbsw]+c1o216; + f_BSE = (D.f[DIR_PMM])[kbse]+c1o216; + f_BNW = (D.f[DIR_MPM])[kbnw]+c1o216; //////////////////////////////////////////////////////////////////////////////// if( BC == GEO_FLUID || BC == GEO_VELO) @@ -4060,7 +4060,7 @@ __global__ void LB_Kernel_Casc27(real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -4089,63 +4089,63 @@ __global__ void LB_Kernel_Casc27(real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -4208,33 +4208,33 @@ __global__ void LB_Kernel_Casc27(real omega, ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - f_E = (D.f[DIR_P00 ])[ke]+c2o27; - f_W = (D.f[DIR_M00 ])[kw]+c2o27; - f_N = (D.f[DIR_0P0 ])[kn]+c2o27; - f_S = (D.f[DIR_0M0 ])[ks]+c2o27; - f_T = (D.f[DIR_00P ])[kt]+c2o27; - f_B = (D.f[DIR_00M ])[kb]+c2o27; - f_NE = (D.f[DIR_PP0 ])[kne]+c1o54; - f_SW = (D.f[DIR_MM0 ])[ksw]+c1o54; - f_SE = (D.f[DIR_PM0 ])[kse]+c1o54; - f_NW = (D.f[DIR_MP0 ])[knw]+c1o54; - f_TE = (D.f[DIR_P0P ])[kte]+c1o54; - f_BW = (D.f[DIR_M0M ])[kbw]+c1o54; - f_BE = (D.f[DIR_P0M ])[kbe]+c1o54; - f_TW = (D.f[DIR_M0P ])[ktw]+c1o54; - f_TN = (D.f[DIR_0PP ])[ktn]+c1o54; - f_BS = (D.f[DIR_0MM ])[kbs]+c1o54; - f_BN = (D.f[DIR_0PM ])[kbn]+c1o54; - f_TS = (D.f[DIR_0MP ])[kts]+c1o54; + f_E = (D.f[DIR_P00])[ke]+c2o27; + f_W = (D.f[DIR_M00])[kw]+c2o27; + f_N = (D.f[DIR_0P0])[kn]+c2o27; + f_S = (D.f[DIR_0M0])[ks]+c2o27; + f_T = (D.f[DIR_00P])[kt]+c2o27; + f_B = (D.f[DIR_00M])[kb]+c2o27; + f_NE = (D.f[DIR_PP0])[kne]+c1o54; + f_SW = (D.f[DIR_MM0])[ksw]+c1o54; + f_SE = (D.f[DIR_PM0])[kse]+c1o54; + f_NW = (D.f[DIR_MP0])[knw]+c1o54; + f_TE = (D.f[DIR_P0P])[kte]+c1o54; + f_BW = (D.f[DIR_M0M])[kbw]+c1o54; + f_BE = (D.f[DIR_P0M])[kbe]+c1o54; + f_TW = (D.f[DIR_M0P])[ktw]+c1o54; + f_TN = (D.f[DIR_0PP])[ktn]+c1o54; + f_BS = (D.f[DIR_0MM])[kbs]+c1o54; + f_BN = (D.f[DIR_0PM])[kbn]+c1o54; + f_TS = (D.f[DIR_0MP])[kts]+c1o54; f_ZERO = (D.f[DIR_000])[kzero]+c8o27; - f_TNE = (D.f[DIR_PPP ])[ktne]+c1o216; - f_TSW = (D.f[DIR_MMP ])[ktsw]+c1o216; - f_TSE = (D.f[DIR_PMP ])[ktse]+c1o216; - f_TNW = (D.f[DIR_MPP ])[ktnw]+c1o216; - f_BNE = (D.f[DIR_PPM ])[kbne]+c1o216; - f_BSW = (D.f[DIR_MMM ])[kbsw]+c1o216; - f_BSE = (D.f[DIR_PMM ])[kbse]+c1o216; - f_BNW = (D.f[DIR_MPM ])[kbnw]+c1o216; + f_TNE = (D.f[DIR_PPP])[ktne]+c1o216; + f_TSW = (D.f[DIR_MMP])[ktsw]+c1o216; + f_TSE = (D.f[DIR_PMP])[ktse]+c1o216; + f_TNW = (D.f[DIR_MPP])[ktnw]+c1o216; + f_BNE = (D.f[DIR_PPM])[kbne]+c1o216; + f_BSW = (D.f[DIR_MMM])[kbsw]+c1o216; + f_BSE = (D.f[DIR_PMM])[kbse]+c1o216; + f_BNW = (D.f[DIR_MPM])[kbnw]+c1o216; //////////////////////////////////////////////////////////////////////////////// if( BC == GEO_FLUID || BC == GEO_VELO) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 22192216927f91c33fafc23c54c3fae334abdd34..9fd2a6b2f5c5c10a36856852db47f989ace714ce 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -5,72 +5,65 @@ #include <math.h> #include <Parameter/Parameter.h> + #include "Parameter/CudaStreamManager.h" -#include "PreCollisionInteractor/ActuatorLine.h" +#include "PreCollisionInteractor/ActuatorFarm.h" #include "PreCollisionInteractor/Probes/Probe.h" +#include <PreCollisionInteractor/PrecursorWriter.h> #include "Calculation/PorousMedia.h" #include "lbm/constants/NumericConstants.h" -void CudaMemoryManager::cudaAllocFull(int lev) -{ - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo ), parameter->getParH(lev)->mem_size_int )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->k ), parameter->getParH(lev)->mem_size_int )); -} -void CudaMemoryManager::cudaFreeFull(int lev) -{ - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->geo )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->k )); -} + void CudaMemoryManager::cudaCopyPrint(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX , parameter->getParD(lev)->velocityX , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY , parameter->getParD(lev)->velocityY , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ , parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho , parameter->getParD(lev)->rho , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX , parameter->getParD(lev)->velocityX , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY , parameter->getParD(lev)->velocityY , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ , parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho , parameter->getParD(lev)->rho , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure , parameter->getParD(lev)->pressure , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); if(parameter->getIsBodyForce()) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP , parameter->getParD(lev)->forceX_SP , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP , parameter->getParD(lev)->forceY_SP , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP , parameter->getParD(lev)->forceZ_SP , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP , parameter->getParD(lev)->forceX_SP , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP , parameter->getParD(lev)->forceY_SP , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP , parameter->getParD(lev)->forceZ_SP , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } if(parameter->getUseTurbulentViscosity()) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity , parameter->getParD(lev)->turbViscosity , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity , parameter->getParD(lev)->turbViscosity , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } } void CudaMemoryManager::cudaCopyMedianPrint(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med , parameter->getParD(lev)->vx_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med , parameter->getParD(lev)->vy_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med , parameter->getParD(lev)->vz_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med , parameter->getParD(lev)->rho_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med , parameter->getParD(lev)->vx_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med , parameter->getParD(lev)->vy_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med , parameter->getParD(lev)->vz_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med , parameter->getParD(lev)->rho_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } void CudaMemoryManager::cudaAllocCoord(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ ), parameter->getParH(lev)->mem_size_real_SP )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ ), parameter->getParH(lev)->memSizeRealLBnodes )); //Device (spinning ship + uppsala) - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ ), parameter->getParH(lev)->mem_size_real_SP )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ ), parameter->getParH(lev)->memSizeRealLBnodes )); ////////////////////////////////////////////////////////////////////////// - double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyCoord(int lev) { //copy host to device - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX, parameter->getParH(lev)->coordinateX, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY, parameter->getParH(lev)->coordinateY, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ, parameter->getParH(lev)->coordinateZ, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX, parameter->getParH(lev)->coordinateX, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY, parameter->getParH(lev)->coordinateY, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ, parameter->getParH(lev)->coordinateZ, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeCoord(int lev) { @@ -81,24 +74,24 @@ void CudaMemoryManager::cudaFreeCoord(int lev) void CudaMemoryManager::cudaAllocBodyForce(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP ), parameter->getParH(lev)->mem_size_real_SP )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); //Device - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP ), parameter->getParH(lev)->mem_size_real_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP ), parameter->getParH(lev)->mem_size_real_SP )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP ), parameter->getParH(lev)->memSizeRealLBnodes )); ////////////////////////////////////////////////////////////////////////// - double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyBodyForce(int lev) { //copy host to device - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP, parameter->getParH(lev)->forceX_SP, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP, parameter->getParH(lev)->forceY_SP, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP, parameter->getParH(lev)->forceZ_SP, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP, parameter->getParH(lev)->forceX_SP, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP, parameter->getParH(lev)->forceY_SP, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP, parameter->getParH(lev)->forceZ_SP, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeBodyForce(int lev) @@ -111,71 +104,71 @@ void CudaMemoryManager::cudaFreeBodyForce(int lev) //print void CudaMemoryManager::cudaCopyDataToHost(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX , parameter->getParD(lev)->velocityX , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY , parameter->getParD(lev)->velocityY , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ , parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho , parameter->getParD(lev)->rho , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX , parameter->getParD(lev)->velocityX , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY , parameter->getParD(lev)->velocityY , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ , parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho , parameter->getParD(lev)->rho , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure , parameter->getParD(lev)->pressure , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } //sparse void CudaMemoryManager::cudaAllocSP(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode ), parameter->getParH(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX ), parameter->getParH(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY ), parameter->getParH(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ ), parameter->getParH(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure ), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode), parameter->getParH(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX ), parameter->getParH(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY ), parameter->getParH(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ ), parameter->getParH(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ ), parameter->getParH(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure ), parameter->getParH(lev)->memSizeRealLBnodes )); //Device - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode ), parameter->getParD(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX ), parameter->getParD(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY ), parameter->getParD(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ ), parameter->getParD(lev)->mem_size_int_SP )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0] ), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode ), parameter->getParD(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX ), parameter->getParD(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY ), parameter->getParD(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ ), parameter->getParD(lev)->memSizeLonglongLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho ), parameter->getParD(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX ), parameter->getParD(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY ), parameter->getParD(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ ), parameter->getParD(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure ), parameter->getParD(lev)->memSizeRealLBnodes )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0]), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes)); ////////////////////////////////////////////////////////////////////////// - double tmp = 4. * (double)parameter->getParH(lev)->mem_size_int_SP + 5. * (double)parameter->getParH(lev)->mem_size_real_SP + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = 4. * (double)parameter->getParH(lev)->memSizeLonglongLBnodes + 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopySP(int lev) { //copy host to device - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode , parameter->getParH(lev)->typeOfGridNode , parameter->getParH(lev)->mem_size_int_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX, parameter->getParH(lev)->neighborX, parameter->getParH(lev)->mem_size_int_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY, parameter->getParH(lev)->neighborY, parameter->getParH(lev)->mem_size_int_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ, parameter->getParH(lev)->neighborZ, parameter->getParH(lev)->mem_size_int_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho , parameter->getParH(lev)->rho , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX , parameter->getParH(lev)->velocityX , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY , parameter->getParH(lev)->velocityY , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->velocityZ , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure , parameter->getParH(lev)->pressure , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode, parameter->getParH(lev)->typeOfGridNode, parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX , parameter->getParH(lev)->neighborX , parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY , parameter->getParH(lev)->neighborY , parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ , parameter->getParH(lev)->neighborZ , parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho , parameter->getParH(lev)->rho , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX , parameter->getParH(lev)->velocityX , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY , parameter->getParH(lev)->velocityY , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ , parameter->getParH(lev)->velocityZ , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure , parameter->getParH(lev)->pressure , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeSP(int lev) { - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure )); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX)); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY)); - checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ)); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY )); + checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ )); } void CudaMemoryManager::cudaAllocF3SP(int lev) { //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes)); ////////////////////////////////////////////////////////////////////////// - double tmp = (double)6 * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = (double)6 * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } @@ -207,20 +200,20 @@ void CudaMemoryManager::cudaAllocVeloBC(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.q27[0]), parameter->getD3Qxx()*mem_size_inflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k), mem_size_inflow_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k), mem_size_inflow_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC), mem_size_inflow_Q_q )); //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.q27[0]), parameter->getD3Qxx()*mem_size_inflow_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k), mem_size_inflow_Q_k )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz), mem_size_inflow_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k), mem_size_inflow_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz), mem_size_inflow_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz), mem_size_inflow_Q_q )); ////////////////////////////////////////////////////////////////////////// double tmp = (double)mem_size_inflow_Q_k + 4. * (double)mem_size_inflow_Q_q + (double)parameter->getD3Qxx() * (double)mem_size_inflow_Q_q; @@ -232,13 +225,14 @@ void CudaMemoryManager::cudaCopyVeloBC(int lev) unsigned int mem_size_inflow_Q_q = sizeof(real)*parameter->getParH(lev)->velocityBC.numberOfBCnodes; checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.q27[0], parameter->getParH(lev)->velocityBC.q27[0], parameter->getD3Qxx()* mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k, parameter->getParH(lev)->velocityBC.k, mem_size_inflow_Q_k, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx, parameter->getParH(lev)->velocityBC.Vx, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy, parameter->getParH(lev)->velocityBC.Vy, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz, parameter->getParH(lev)->velocityBC.Vz, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k, parameter->getParH(lev)->velocityBC.k, mem_size_inflow_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx, parameter->getParH(lev)->velocityBC.Vx, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy, parameter->getParH(lev)->velocityBC.Vy, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz, parameter->getParH(lev)->velocityBC.Vz, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz, mem_size_inflow_Q_q, cudaMemcpyHostToDevice)); } + void CudaMemoryManager::cudaFreeVeloBC(int lev) { checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityBC.q27[0] )); @@ -256,15 +250,15 @@ void CudaMemoryManager::cudaAllocOutflowBC(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.q27[0]), parameter->getD3Qxx()*mem_size_outflow_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k), mem_size_outflow_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN), mem_size_outflow_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC), mem_size_outflow_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k), mem_size_outflow_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN), mem_size_outflow_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC), mem_size_outflow_Q_q )); //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.q27[0]), parameter->getD3Qxx()* mem_size_outflow_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k), mem_size_outflow_Q_k )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN), mem_size_outflow_Q_k )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC), mem_size_outflow_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k), mem_size_outflow_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN), mem_size_outflow_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC), mem_size_outflow_Q_q )); ////////////////////////////////////////////////////////////////////////// double tmp = (double)mem_size_outflow_Q_q + 2. * (double)mem_size_outflow_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_outflow_Q_q; @@ -276,9 +270,9 @@ void CudaMemoryManager::cudaCopyOutflowBC(int lev) unsigned int mem_size_outflow_Q_q = sizeof(real)*parameter->getParH(lev)->outflowBC.numberOfBCnodes; checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.q27[0], parameter->getParH(lev)->outflowBC.q27[0], parameter->getD3Qxx()* mem_size_outflow_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k, parameter->getParH(lev)->outflowBC.k, mem_size_outflow_Q_k, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN, parameter->getParH(lev)->outflowBC.kN, mem_size_outflow_Q_k, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC, parameter->getParH(lev)->outflowBC.RhoBC, mem_size_outflow_Q_q, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k, parameter->getParH(lev)->outflowBC.k, mem_size_outflow_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN, parameter->getParH(lev)->outflowBC.kN, mem_size_outflow_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC, parameter->getParH(lev)->outflowBC.RhoBC, mem_size_outflow_Q_q, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeOutflowBC(int lev) { @@ -297,13 +291,13 @@ void CudaMemoryManager::cudaAllocNoSlipBC(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k), mem_size_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread), mem_size_Q_q_read ));//Geller - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ), mem_size_Q_value ));//Geller + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread), mem_size_Q_q_read ));//Geller + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ), mem_size_Q_value ));//Geller //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.q27[0]), parameter->getD3Qxx()* mem_size_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k), mem_size_Q_k )); ////////////////////////////////////////////////////////////////////////// double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q; @@ -315,7 +309,7 @@ void CudaMemoryManager::cudaCopyNoSlipBC(int lev) unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes; checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.q27[0], parameter->getParH(lev)->noSlipBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k, parameter->getParH(lev)->noSlipBC.k, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k, parameter->getParH(lev)->noSlipBC.k, mem_size_Q_k, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeNoSlipBC(int lev) { @@ -332,11 +326,11 @@ void CudaMemoryManager::cudaAllocGeomBC(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k), mem_size_Q_k )); //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.q27[0]), parameter->getD3Qxx()* mem_size_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k), mem_size_Q_k )); ////////////////////////////////////////////////////////////////////////// double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q; @@ -348,7 +342,7 @@ void CudaMemoryManager::cudaCopyGeomBC(int lev) unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->geometryBC.numberOfBCnodes; checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.q27[0], parameter->getParH(lev)->geometryBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q, cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k, parameter->getParH(lev)->geometryBC.k, mem_size_Q_k, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k, parameter->getParH(lev)->geometryBC.k, mem_size_Q_k, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeGeomBC(int lev) { @@ -363,15 +357,15 @@ void CudaMemoryManager::cudaAllocPress(int lev) //Host checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k), mem_size_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN), mem_size_Q_k )); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC), mem_size_Q_q )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN), mem_size_Q_k )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC), mem_size_Q_q )); //Device checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.q27[0]), parameter->getD3Qxx()* mem_size_Q_q )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k), mem_size_Q_k )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN), mem_size_Q_k )); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC), mem_size_Q_q )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN), mem_size_Q_k )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC), mem_size_Q_q )); ////////////////////////////////////////////////////////////////////////// double tmp = 2. * (double)mem_size_Q_k + (double)mem_size_Q_q + (double)parameter->getD3Qxx()*(double)mem_size_Q_q; @@ -524,24 +518,24 @@ void CudaMemoryManager::cudaCopyProcessNeighborXIndex(int lev, unsigned int proc cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, - const unsigned int &memsizeFsRecv, int streamIndex) + const unsigned int &memsizeFsRecv) { - if (streamIndex == -1) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice)); else checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0], - parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], - parameter->getD3Qxx() * memsizeFsRecv, - cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(streamIndex))); + parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], + parameter->getD3Qxx() * memsizeFsRecv, + cudaMemcpyHostToDevice, + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, - const unsigned int &memsizeFsSend, int streamIndex) -{ - if (streamIndex == -1) + const unsigned int &memsizeFsSend) +{ + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, @@ -551,7 +545,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, - parameter->getStreamManager()->getStream(streamIndex))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor) { @@ -594,35 +588,33 @@ void CudaMemoryManager::cudaCopyProcessNeighborYIndex(int lev, unsigned int proc parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].memsizeIndex, cudaMemcpyHostToDevice)); } -void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, - int streamIndex) +void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv) { - if (streamIndex == -1) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice)); else - checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], - parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * memsizeFsRecv, - cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(streamIndex))); + checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], + parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], + parameter->getD3Qxx() * memsizeFsRecv, + cudaMemcpyHostToDevice, + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } -void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, - int streamIndex) +void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend) { - if (streamIndex == -1) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost)); else - checkCudaErrors( - cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * memsizeFsSend, - cudaMemcpyDeviceToHost, parameter->getStreamManager()->getStream(streamIndex))); + checkCudaErrors( cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getD3Qxx() * memsizeFsSend, + cudaMemcpyDeviceToHost, + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) { @@ -666,9 +658,9 @@ void CudaMemoryManager::cudaCopyProcessNeighborZIndex(int lev, unsigned int proc cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, - const unsigned int &memsizeFsRecv, int streamIndex) + const unsigned int &memsizeFsRecv) { - if (streamIndex == -1) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, @@ -678,12 +670,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsRecv, cudaMemcpyHostToDevice, - parameter->getStreamManager()->getStream(streamIndex))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, - const unsigned int &memsizeFsSend, int streamIndex) + const unsigned int &memsizeFsSend) { - if (streamIndex == -1) + if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder)) checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, @@ -693,7 +685,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * memsizeFsSend, cudaMemcpyDeviceToHost, - parameter->getStreamManager()->getStream(streamIndex))); + parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder))); } void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor) { @@ -887,17 +879,17 @@ void CudaMemoryManager::cudaFreeProcessNeighborF3Z(int lev, unsigned int process void CudaMemoryManager::cudaAllocNeighborWSB(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse ), parameter->getParH(lev)->mem_size_int_SP )); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse ), parameter->getParH(lev)->memSizeLonglongLBnodes )); //Device - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse ), parameter->getParD(lev)->mem_size_int_SP )); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse ), parameter->getParD(lev)->memSizeLonglongLBnodes )); ////////////////////////////////////////////////////////////////////////// - double tmp = (double)parameter->getParH(lev)->mem_size_int_SP; + double tmp = (double)parameter->getParH(lev)->memSizeLonglongLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyNeighborWSB(int lev) { //copy host to device - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse, parameter->getParH(lev)->neighborInverse, parameter->getParH(lev)->mem_size_int_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse, parameter->getParH(lev)->neighborInverse, parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeNeighborWSB(int lev) { @@ -907,7 +899,7 @@ void CudaMemoryManager::cudaFreeNeighborWSB(int lev) void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev) { //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->memSizeRealLBnodes)); //Debug // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSij ), parameter->getParH(lev)->mem_size_real_SP)); // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSDij), parameter->getParH(lev)->mem_size_real_SP)); @@ -922,7 +914,7 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev) // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gDzvz), parameter->getParH(lev)->mem_size_real_SP)); //Device - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->memSizeRealLBnodes)); //Debug // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSij ), parameter->getParD(lev)->mem_size_real_SP)); // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSDij), parameter->getParD(lev)->mem_size_real_SP)); @@ -937,13 +929,13 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev) // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gDzvz), parameter->getParD(lev)->mem_size_real_SP)); // ////////////////////////////////////////////////////////////////////////// // double tmp = (double)parameter->getParH(lev)->mem_size_real_SP * 12.0; - double tmp = (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev) { //copy host to device - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice)); //Debug // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSij , parameter->getParH(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSDij, parameter->getParH(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); @@ -960,7 +952,7 @@ void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev) void CudaMemoryManager::cudaCopyTurbulentViscosityDH(int lev) { //copy device to host - checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost)); //Debug // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSij , parameter->getParD(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSDij, parameter->getParD(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); @@ -1062,29 +1054,29 @@ void CudaMemoryManager::cudaFreeTurbulenceIntensity(int lev) void CudaMemoryManager::cudaAllocMedianSP(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med ), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med ), parameter->getParH(lev)->memSizeRealLBnodes)); //Device - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med ), parameter->getParD(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med ), parameter->getParD(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med ), parameter->getParD(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med ), parameter->getParD(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med ), parameter->getParD(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med ), parameter->getParD(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med ), parameter->getParD(lev)->memSizeRealLBnodes)); ////////////////////////////////////////////////////////////////////////// - double tmp = 5. * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyMedianSP(int lev) { //copy host to device - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med , parameter->getParH(lev)->rho_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med , parameter->getParH(lev)->vx_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med , parameter->getParH(lev)->vy_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med , parameter->getParH(lev)->vz_SP_Med , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->press_SP_Med, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med , parameter->getParH(lev)->rho_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med , parameter->getParH(lev)->vx_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med , parameter->getParH(lev)->vy_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med , parameter->getParH(lev)->vz_SP_Med , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->press_SP_Med, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeMedianSP(int lev) { @@ -1097,11 +1089,11 @@ void CudaMemoryManager::cudaFreeMedianSP(int lev) void CudaMemoryManager::cudaAllocMedianOut(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out ), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out ), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out ), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out ), parameter->getParH(lev)->memSizeRealLBnodes)); } void CudaMemoryManager::cudaFreeMedianOut(int lev) { @@ -1655,6 +1647,133 @@ void CudaMemoryManager::cudaFreeWallModel(int lev, bool hasWallModelMonitor) } } + +//Precursor BC +void CudaMemoryManager::cudaAllocPrecursorBC(int lev) +{ + uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int); + uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint); + uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real); + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.k, memSizeQInt)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal)); + + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighbor0PP, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighbor0PM, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighbor0MP, memSizeQUint)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.planeNeighbor0MM, memSizeQUint)); + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weights0PP, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weights0PM, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weights0MP, memSizeQReal)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.weights0MM, memSizeQReal)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.k, memSizeQInt)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.q27[0], parameter->getD3Qxx()*memSizeQReal)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighbor0PP, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighbor0PM, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighbor0MP, memSizeQUint)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.planeNeighbor0MM, memSizeQUint)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weights0PP, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weights0PM, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weights0MP, memSizeQReal)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.weights0MM, memSizeQReal)); + + real memSize = memSizeQInt+4*memSizeQUint+(4+parameter->getD3Qxx())*memSizeQReal; + setMemsizeGPU(memSize, false); + +} + + +void CudaMemoryManager::cudaAllocPrecursorData(int lev) +{ + size_t size = parameter->getParH(lev)->precursorBC.numberOfPrecursorNodes*sizeof(real)*parameter->getParH(lev)->precursorBC.numberOfQuantities; + + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.last, size)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.current, size)); + checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.next, size)); + + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.last, size)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.current, size)); + checkCudaErrors( cudaMalloc((void**) ¶meter->getParD(lev)->precursorBC.next, size)); + setMemsizeGPU(3*size, false); +} + + +void CudaMemoryManager::cudaCopyPrecursorBC(int lev) +{ + uint memSizeQInt = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(int); + uint memSizeQUint = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(uint); + uint memSizeQReal = parameter->getParH(lev)->precursorBC.numberOfBCnodes*sizeof(real); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.k, parameter->getParH(lev)->precursorBC.k, memSizeQInt, cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.q27[0], parameter->getParH(lev)->precursorBC.q27[0], memSizeQReal*parameter->getD3Qxx(), cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0PP, parameter->getParH(lev)->precursorBC.planeNeighbor0PP, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0PM, parameter->getParH(lev)->precursorBC.planeNeighbor0PM, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0MP, parameter->getParH(lev)->precursorBC.planeNeighbor0MP, memSizeQUint, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.planeNeighbor0MM, parameter->getParH(lev)->precursorBC.planeNeighbor0MM, memSizeQUint, cudaMemcpyHostToDevice)); + + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0PP, parameter->getParH(lev)->precursorBC.weights0PP, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0PM, parameter->getParH(lev)->precursorBC.weights0PM, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0MP, parameter->getParH(lev)->precursorBC.weights0MP, memSizeQReal, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->precursorBC.weights0MM, parameter->getParH(lev)->precursorBC.weights0MM, memSizeQReal, cudaMemcpyHostToDevice)); +} +void CudaMemoryManager::cudaCopyPrecursorData(int lev) +{ + auto prec = ¶meter->getParH(lev)->precursorBC; + auto precStream = parameter->getStreamManager()->getStream(CudaStreamIndex::Precursor); + size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities; + checkCudaErrors( cudaStreamSynchronize(precStream) ); + checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, precStream) ); +} + + +void CudaMemoryManager::cudaFreePrecursorBC(int lev) +{ + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.k)); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.q27[0])); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0PP)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0PM)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0MP)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.planeNeighbor0MM)); + + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0PP)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0PM)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0MP)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.weights0MM)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.k)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.q27[0])); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0PP)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0PM)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0MP)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.planeNeighbor0MM)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0PP)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0PM)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0MP)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.weights0MM)); +} + +void CudaMemoryManager::cudaFreePrecursorData(int lev) +{ + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.last)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.current)); + checkCudaErrors( cudaFreeHost( parameter->getParH(lev)->precursorBC.next)); + + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.last)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.current)); + checkCudaErrors( cudaFree( parameter->getParD(lev)->precursorBC.next)); +} //Test roundoff error void CudaMemoryManager::cudaAllocTestRE(int lev, unsigned int size) { @@ -1913,15 +2032,15 @@ void CudaMemoryManager::cudaFreeMeasurePointsIndex(int lev) } void CudaMemoryManager::cudaAllocFsForCheckPointAndRestart(int lev) { - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes)); } void CudaMemoryManager::cudaCopyFsForRestart(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0], parameter->getParH(lev)->distributions.f[0], (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0], parameter->getParH(lev)->distributions.f[0], (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaCopyFsForCheckPoint(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0], parameter->getParD(lev)->distributions.f[0], (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0], parameter->getParD(lev)->distributions.f[0], (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } void CudaMemoryManager::cudaFreeFsForCheckPointAndRestart(int lev) { @@ -2400,20 +2519,20 @@ void CudaMemoryManager::cudaFreePorousMedia(PorousMedia* pm, int lev) void CudaMemoryManager::cudaAllocConcentration(int lev) { //Host - checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->Conc), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->Conc), parameter->getParH(lev)->memSizeRealLBnodes)); //Device - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->Conc), parameter->getParD(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->Conc), parameter->getParD(lev)->memSizeRealLBnodes)); ////////////////////////////////////////////////////////////////////////// - double tmp = (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); } void CudaMemoryManager::cudaCopyConcentrationDeviceToHost(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->Conc, parameter->getParD(lev)->Conc, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost)); + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->Conc, parameter->getParD(lev)->Conc, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost)); } void CudaMemoryManager::cudaCopyConcentrationHostToDevice(int lev) { - checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->Conc, parameter->getParH(lev)->Conc, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->Conc, parameter->getParH(lev)->Conc, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFreeConcentration(int lev) { @@ -2425,14 +2544,14 @@ void CudaMemoryManager::cudaAllocTempFs(int lev) //Device if (parameter->getDiffMod() == 7) { - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes)); } else if (parameter->getDiffMod() == 27) { - checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD27.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD27.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes)); } ////////////////////////////////////////////////////////////////////////// - double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->mem_size_real_SP); + double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->memSizeRealLBnodes); setMemsizeGPU(tmp, false); } ////////////////////////////////////////////////////////////////////////// @@ -2627,12 +2746,12 @@ void CudaMemoryManager::cudaFreeConcFile(int lev) void CudaMemoryManager::cudaAllocMedianOutAD(int lev) { //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes)); } void CudaMemoryManager::cudaFreeMedianOutAD(int lev) { @@ -2886,31 +3005,31 @@ void CudaMemoryManager::cudaFreeProcessNeighborADZ(int lev, unsigned int process void CudaMemoryManager::cudaAlloc2ndOrderDerivitivesIsoTest(int lev) { //Host - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes)); //Device (spinning ship) - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP)); - checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP)); + checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes)); + checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes)); ////////////////////////////////////////////////////////////////////////// - double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP; + double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes; setMemsizeGPU(tmp, false); //printf("Coord = %f MB",tmp/1000000.); } void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestDH(int lev) { //copy device to host - checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost)); } void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestHD(int lev) { //copy host to device - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice)); } void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev) @@ -2921,231 +3040,297 @@ void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev) } -void CudaMemoryManager::cudaAllocFluidNodeIndices(int lev) { - uint mem_size_geo_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodes; +void CudaMemoryManager::cudaAllocTaggedFluidNodeIndices(CollisionTemplate tag, int lev) { + uint mem_size_tagged_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfTaggedFluidNodes[tag]; // Host - checkCudaErrors(cudaMallocHost((void **)&(parameter->getParH(lev)->fluidNodeIndices), mem_size_geo_fluid_nodes)); + checkCudaErrors(cudaMallocHost((void **)&(parameter->getParH(lev)->taggedFluidNodeIndices[tag]), mem_size_tagged_fluid_nodes)); // Device - checkCudaErrors(cudaMalloc((void **)&(parameter->getParD(lev)->fluidNodeIndices), mem_size_geo_fluid_nodes)); + checkCudaErrors(cudaMalloc((void **)&(parameter->getParD(lev)->taggedFluidNodeIndices[tag]), mem_size_tagged_fluid_nodes)); ////////////////////////////////////////////////////////////////////////// - setMemsizeGPU((double)mem_size_geo_fluid_nodes, false); + setMemsizeGPU((double)mem_size_tagged_fluid_nodes, false); } -void CudaMemoryManager::cudaCopyFluidNodeIndices(int lev) { - uint mem_size_geo_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodes; - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fluidNodeIndices, - parameter->getParH(lev)->fluidNodeIndices, - mem_size_geo_fluid_nodes, cudaMemcpyHostToDevice)); +void CudaMemoryManager::cudaCopyTaggedFluidNodeIndices(CollisionTemplate tag, int lev) { + uint mem_size_tagged_fluid_nodes = sizeof(uint) * parameter->getParH(lev)->numberOfTaggedFluidNodes[tag]; + checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->taggedFluidNodeIndices[tag], + parameter->getParH(lev)->taggedFluidNodeIndices[tag], + mem_size_tagged_fluid_nodes, cudaMemcpyHostToDevice)); } -void CudaMemoryManager::cudaFreeFluidNodeIndices(int lev) { - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->fluidNodeIndices)); +void CudaMemoryManager::cudaFreeTaggedFluidNodeIndices(CollisionTemplate tag, int lev) { + checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->taggedFluidNodeIndices[tag])); } -void CudaMemoryManager::cudaAllocFluidNodeIndicesBorder(int lev) { - uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder; - // Host - checkCudaErrors( - cudaMallocHost((void **)&(parameter->getParH(lev)->fluidNodeIndicesBorder), mem_size_fluid_nodes_border)); - // Device - checkCudaErrors( - cudaMalloc((void **)&(parameter->getParD(lev)->fluidNodeIndicesBorder), mem_size_fluid_nodes_border)); - ////////////////////////////////////////////////////////////////////////// - setMemsizeGPU((double)mem_size_fluid_nodes_border, false); -} +//////////////////////////////////////////////////////////////////////////////////// +// ActuatorFarm +/////////////////////////////////////////////////////////////////////////////// +void CudaMemoryManager::cudaAllocBladeGeometries(ActuatorFarm* actuatorFarm) +{ + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeRadiiH, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->diametersH, sizeRealTurbine) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosXH, sizeRealTurbine) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosYH, sizeRealTurbine) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->turbinePosZH, sizeRealTurbine) ); + + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeRadiiD, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->diametersD, sizeRealTurbine) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosXD, sizeRealTurbine) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosYD, sizeRealTurbine) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->turbinePosZD, sizeRealTurbine) ); + setMemsizeGPU(sizeof(real)*(actuatorFarm->getNumberOfNodesPerBlade()+4)*actuatorFarm->getNumberOfTurbines(), false); -void CudaMemoryManager::cudaCopyFluidNodeIndicesBorder(int lev) { - uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder; - checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fluidNodeIndicesBorder, - parameter->getParH(lev)->fluidNodeIndicesBorder, - mem_size_fluid_nodes_border, cudaMemcpyHostToDevice)); } +void CudaMemoryManager::cudaCopyBladeGeometriesHtoD(ActuatorFarm* actuatorFarm) +{ + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeRadiiD, actuatorFarm->bladeRadiiH, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->diametersD, actuatorFarm->diametersH, sizeRealTurbine, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosXD, actuatorFarm->turbinePosXH, sizeRealTurbine, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosYD, actuatorFarm->turbinePosYH, sizeRealTurbine, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosZD, actuatorFarm->turbinePosZH, sizeRealTurbine, cudaMemcpyHostToDevice) ); -void CudaMemoryManager::cudaFreeFluidNodeIndicesBorder(int lev) { - checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->fluidNodeIndicesBorder)); } +void CudaMemoryManager::cudaCopyBladeGeometriesDtoH(ActuatorFarm* actuatorFarm) +{ + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeRadiiH, actuatorFarm->bladeRadiiD, sizeRealTurbine*actuatorFarm->getNumberOfNodesPerBlade(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->diametersH, actuatorFarm->diametersD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosXH, actuatorFarm->turbinePosXD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosYH, actuatorFarm->turbinePosYD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->turbinePosZH, actuatorFarm->turbinePosZD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); -//////////////////////////////////////////////////////////////////////////////////// -// ActuatorLine -/////////////////////////////////////////////////////////////////////////////// +} +void CudaMemoryManager::cudaFreeBladeGeometries(ActuatorFarm* actuatorFarm) +{ + checkCudaErrors( cudaFree(actuatorFarm->bladeRadiiD) ); + checkCudaErrors( cudaFree(actuatorFarm->diametersD) ); + checkCudaErrors( cudaFree(actuatorFarm->turbinePosXD) ); + checkCudaErrors( cudaFree(actuatorFarm->turbinePosYD) ); + checkCudaErrors( cudaFree(actuatorFarm->turbinePosZD) ); + + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeRadiiH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->diametersH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosXH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosYH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->turbinePosZH) ); +} -void CudaMemoryManager::cudaAllocBladeRadii(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocBladeOrientations(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes()) ); + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->omegasH, sizeRealTurbine) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->azimuthsH, sizeRealTurbine) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->yawsH, sizeRealTurbine) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->omegasD, sizeRealTurbine) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->azimuthsD, sizeRealTurbine) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->yawsD, sizeRealTurbine) ); - setMemsizeGPU(sizeof(real)*actuatorLine->getNBladeNodes(), false); -} + setMemsizeGPU(3*sizeRealTurbine, false); -void CudaMemoryManager::cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine) -{ - checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiD, actuatorLine->bladeRadiiH, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyHostToDevice) ); } +void CudaMemoryManager::cudaCopyBladeOrientationsHtoD(ActuatorFarm* actuatorFarm) +{ + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMemcpy(actuatorFarm->omegasD, actuatorFarm->omegasH, sizeRealTurbine, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->azimuthsD, actuatorFarm->azimuthsH, sizeRealTurbine, cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->yawsD, actuatorFarm->yawsH, sizeRealTurbine, cudaMemcpyHostToDevice) ); -void CudaMemoryManager::cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine) +} +void CudaMemoryManager::cudaCopyBladeOrientationsDtoH(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeRadiiH, actuatorLine->bladeRadiiD, sizeof(real)*actuatorLine->getNBladeNodes(), cudaMemcpyDeviceToHost) ); + uint sizeRealTurbine = sizeof(real)*actuatorFarm->getNumberOfTurbines(); + checkCudaErrors( cudaMemcpy(actuatorFarm->omegasH, actuatorFarm->omegasD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->azimuthsH, actuatorFarm->azimuthsD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->yawsH, actuatorFarm->yawsD, sizeRealTurbine, cudaMemcpyDeviceToHost) ); } - -void CudaMemoryManager::cudaFreeBladeRadii(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeBladeOrientations(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFree(actuatorLine->bladeRadiiD) ); + checkCudaErrors( cudaFree((void**) &actuatorFarm->omegasD) ); + checkCudaErrors( cudaFree((void**) &actuatorFarm->azimuthsD) ); + checkCudaErrors( cudaFree((void**) &actuatorFarm->yawsD) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeRadiiH) ); + checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->omegasH) ); + checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->azimuthsH) ); + checkCudaErrors( cudaFreeHost((void**) &actuatorFarm->yawsH) ); } -void CudaMemoryManager::cudaAllocBladeCoords(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocBladeCoords(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - setMemsizeGPU(3.f*actuatorLine->getNNodes(), false); + setMemsizeGPU(6.f*actuatorFarm->getNumberOfNodes(), false); } -void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXD, actuatorLine->bladeCoordsXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYD, actuatorLine->bladeCoordsYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZD, actuatorLine->bladeCoordsZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXDCurrentTimestep, actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYDCurrentTimestep, actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZDCurrentTimestep, actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsXH, actuatorLine->bladeCoordsXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsYH, actuatorLine->bladeCoordsYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeCoordsZH, actuatorLine->bladeCoordsZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXH, actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYH, actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZH, actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); } -void CudaMemoryManager::cudaFreeBladeCoords(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeBladeCoords(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFree(actuatorLine->bladeCoordsXD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeCoordsYD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeCoordsZD) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsXDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsYDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsZDCurrentTimestep) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsXH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsYH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeCoordsZH) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsXDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsYDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeCoordsZDPreviousTimestep) ); + + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsXH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsYH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeCoordsZH) ); } -void CudaMemoryManager::cudaAllocBladeIndices(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocBladeIndices(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeIndicesD, sizeof(uint)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeIndicesD, sizeof(uint)*actuatorFarm->getNumberOfNodes()) ); - setMemsizeGPU(sizeof(uint)*actuatorLine->getNNodes(), false); + setMemsizeGPU(sizeof(uint)*actuatorFarm->getNumberOfNodes(), false); } -void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeIndicesD, actuatorLine->bladeIndicesH, sizeof(uint)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeIndicesD, actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaFreeBladeIndices(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeBladeIndices(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFree(actuatorLine->bladeIndicesD) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeIndicesD) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeIndicesH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeIndicesH) ); } -void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNNodes(), false); + setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false); } -void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXD, actuatorLine->bladeVelocitiesXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYD, actuatorLine->bladeVelocitiesYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZD, actuatorLine->bladeVelocitiesZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXDCurrentTimestep, actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYDCurrentTimestep, actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZDCurrentTimestep, actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesXH, actuatorLine->bladeVelocitiesXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesYH, actuatorLine->bladeVelocitiesYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeVelocitiesZH, actuatorLine->bladeVelocitiesZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXH, actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYH, actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZH, actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); } -void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesXD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesYD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeVelocitiesZD) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesXDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesYDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesZDCurrentTimestep) ); + + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesXDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesYDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeVelocitiesZDPreviousTimestep) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesXH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesYH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeVelocitiesZH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesXH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesYH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeVelocitiesZH) ); } -void CudaMemoryManager::cudaAllocBladeForces(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocBladeForces(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMallocHost((void**) &actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNNodes()) ); - checkCudaErrors( cudaMalloc((void**) &actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); + checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) ); - setMemsizeGPU(3.*sizeof(real)*actuatorLine->getNNodes(), false); + setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false); } -void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXD, actuatorLine->bladeForcesXH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYD, actuatorLine->bladeForcesYH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZD, actuatorLine->bladeForcesZH, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXDCurrentTimestep, actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYDCurrentTimestep, actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZDCurrentTimestep, actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesXH, actuatorLine->bladeForcesXD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesYH, actuatorLine->bladeForcesYD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); - checkCudaErrors( cudaMemcpy(actuatorLine->bladeForcesZH, actuatorLine->bladeForcesZD, sizeof(real)*actuatorLine->getNNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXH, actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYH, actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZH, actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) ); } -void CudaMemoryManager::cudaFreeBladeForces(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeBladeForces(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFree(actuatorLine->bladeForcesXD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeForcesYD) ); - checkCudaErrors( cudaFree(actuatorLine->bladeForcesZD) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesXDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesYDCurrentTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesZDCurrentTimestep) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesXH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesYH) ); - checkCudaErrors( cudaFreeHost(actuatorLine->bladeForcesZH) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesXDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesYDPreviousTimestep) ); + checkCudaErrors( cudaFree(actuatorFarm->bladeForcesZDPreviousTimestep) ); + + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesXH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesYH) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->bladeForcesZH) ); } -void CudaMemoryManager::cudaAllocSphereIndices(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaAllocSphereIndices(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMallocHost((void**) &(actuatorLine->boundingSphereIndicesH), sizeof(int)*actuatorLine->getNIndices())); - checkCudaErrors( cudaMalloc((void**) &(actuatorLine->boundingSphereIndicesD), sizeof(int)*actuatorLine->getNIndices())); - setMemsizeGPU(sizeof(int)*actuatorLine->getNIndices(), false); + checkCudaErrors( cudaMallocHost((void**) &(actuatorFarm->boundingSphereIndicesH), sizeof(int)*actuatorFarm->getNumberOfIndices())); + checkCudaErrors( cudaMalloc((void**) &(actuatorFarm->boundingSphereIndicesD), sizeof(int)*actuatorFarm->getNumberOfIndices())); + setMemsizeGPU(sizeof(int)*actuatorFarm->getNumberOfIndices(), false); } -void CudaMemoryManager::cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaCopySphereIndicesHtoD(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaMemcpy(actuatorLine->boundingSphereIndicesD, actuatorLine->boundingSphereIndicesH, sizeof(int)*actuatorLine->getNIndices(), cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(actuatorFarm->boundingSphereIndicesD, actuatorFarm->boundingSphereIndicesH, sizeof(int)*actuatorFarm->getNumberOfIndices(), cudaMemcpyHostToDevice) ); } -void CudaMemoryManager::cudaFreeSphereIndices(ActuatorLine* actuatorLine) +void CudaMemoryManager::cudaFreeSphereIndices(ActuatorFarm* actuatorFarm) { - checkCudaErrors( cudaFreeHost(actuatorLine->boundingSphereIndicesH) ); - checkCudaErrors( cudaFree(actuatorLine->boundingSphereIndicesD) ); + checkCudaErrors( cudaFreeHost(actuatorFarm->boundingSphereIndicesH) ); + checkCudaErrors( cudaFree(actuatorFarm->boundingSphereIndicesD) ); } //////////////////////////////////////////////////////////////////////////////////// @@ -3231,8 +3416,11 @@ void CudaMemoryManager::cudaCopyProbeQuantityArrayHtoD(Probe* probe, int level) } void CudaMemoryManager::cudaCopyProbeQuantityArrayDtoH(Probe* probe, int level) { - checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesArrayH, probe->getProbeStruct(level)->quantitiesArrayD, probe->getProbeStruct(level)->nArrays*sizeof(real)*probe->getProbeStruct(level)->nPoints, cudaMemcpyDeviceToHost) ); + auto probeStruct = probe->getProbeStruct(level); + + checkCudaErrors( cudaMemcpy(probeStruct->quantitiesArrayH, probeStruct->quantitiesArrayD, probeStruct->nArrays*sizeof(real)*probeStruct->nPoints, cudaMemcpyDeviceToHost) ); } + void CudaMemoryManager::cudaFreeProbeQuantityArray(Probe* probe, int level) { checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesArrayH) ); @@ -3262,6 +3450,7 @@ void CudaMemoryManager::cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->quantitiesH, probe->getProbeStruct(level)->quantitiesD, int(Statistic::LAST)*sizeof(bool), cudaMemcpyDeviceToHost) ); checkCudaErrors( cudaMemcpy(probe->getProbeStruct(level)->arrayOffsetsH, probe->getProbeStruct(level)->arrayOffsetsD, int(Statistic::LAST)*sizeof(int), cudaMemcpyDeviceToHost) ); } + void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level) { checkCudaErrors( cudaFreeHost(probe->getProbeStruct(level)->quantitiesH) ); @@ -3270,23 +3459,51 @@ void CudaMemoryManager::cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int leve checkCudaErrors( cudaFree (probe->getProbeStruct(level)->arrayOffsetsD) ); } +void CudaMemoryManager::cudaAllocPrecursorWriter(PrecursorWriter* writer, int level) +{ + auto prec = writer->getPrecursorStruct(level); + size_t indSize = prec->numberOfPointsInBC*sizeof(uint); + checkCudaErrors( cudaStreamCreate(&prec->stream) ); + checkCudaErrors( cudaMallocHost((void**) &prec->indicesH, indSize)); + checkCudaErrors( cudaMalloc((void**) &prec->indicesD, indSize)); + size_t dataSize = prec->numberOfPointsInBC*sizeof(real)*prec->numberOfQuantities; + size_t dataSizeH = dataSize * prec->numberOfTimestepsPerFile; + + checkCudaErrors( cudaMallocHost((void**) &prec->dataH, dataSizeH)); + checkCudaErrors( cudaMallocHost((void**) &prec->bufferH, dataSizeH)); + checkCudaErrors( cudaMalloc((void**) &prec->dataD, dataSize)); + checkCudaErrors( cudaMalloc((void**) &prec->bufferD, dataSize)); + setMemsizeGPU(indSize+2*dataSize, false); +} +void CudaMemoryManager::cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level) +{ + checkCudaErrors( cudaMemcpy(writer->getPrecursorStruct(level)->indicesD, writer->getPrecursorStruct(level)->indicesH, writer->getPrecursorStruct(level)->numberOfPointsInBC*sizeof(uint), cudaMemcpyHostToDevice) ); +} +void CudaMemoryManager::cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level) +{ + auto prec = writer->getPrecursorStruct(level); + int sizeTimestep = prec->numberOfPointsInBC*prec->numberOfQuantities; + checkCudaErrors( cudaStreamSynchronize(prec->stream) ); + checkCudaErrors( cudaMemcpyAsync( &prec->bufferH[prec->numberOfTimestepsBuffered*sizeTimestep], prec->bufferD, sizeof(real)*sizeTimestep, cudaMemcpyDeviceToHost, prec->stream)); +} +void CudaMemoryManager::cudaFreePrecursorWriter(PrecursorWriter* writer, int level) +{ + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->indicesH)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->indicesD)); - - - - - - - - + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->dataH)); + checkCudaErrors( cudaFreeHost(writer->getPrecursorStruct(level)->bufferH)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->dataD)); + checkCudaErrors( cudaFree(writer->getPrecursorStruct(level)->bufferD)); +} CudaMemoryManager::CudaMemoryManager(std::shared_ptr<Parameter> parameter) : parameter(parameter) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index d410340d2de7797cf23a781a64d11f592d62a6fb..e2f2e8658b6ef7a9453546454dd8e1f643574e17 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -18,8 +18,10 @@ class Parameter; class PorousMedia; -class ActuatorLine; +class ActuatorFarm; class Probe; +class VelocitySetter; +class PrecursorWriter; class VIRTUALFLUIDS_GPU_EXPORT CudaMemoryManager { @@ -30,8 +32,8 @@ public: void setMemsizeGPU(double admem, bool reset); double getMemsizeGPU(); - void cudaAllocFull(int lev); - void cudaFreeFull(int lev); + //void cudaAllocFull(int lev); //DEPRECATED: related to full matrix + //void cudaFreeFull(int lev); //DEPRECATED: related to full matrix void cudaCopyPrint(int lev); void cudaCopyMedianPrint(int lev); @@ -92,26 +94,20 @@ public: ////////////////////////////////////////////////////////////////////////// //3D domain decomposition virtual void cudaAllocProcessNeighborX(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, - int streamIndex); - void cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, - int streamIndex); + void cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv); + void cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend); virtual void cudaCopyProcessNeighborXIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor); // virtual void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, - int streamIndex); - void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, - int streamIndex); + void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv); + void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend); virtual void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor); // virtual void cudaAllocProcessNeighborZ(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv, - int streamIndex); - void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend, - int streamIndex); + void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv); + void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend); virtual void cudaCopyProcessNeighborZIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor); @@ -183,6 +179,13 @@ public: void cudaCopyStressBC(int lev); void cudaFreeStressBC(int lev); + void cudaAllocPrecursorBC(int lev); + void cudaAllocPrecursorData(int lev); + void cudaCopyPrecursorBC(int lev); + void cudaCopyPrecursorData(int lev); + void cudaFreePrecursorBC(int lev); + void cudaFreePrecursorData(int lev); + void cudaAllocWallModel(int lev, bool hasWallModelMonitor); void cudaCopyWallModel(int lev, bool hasWallModelMonitor); void cudaFreeWallModel(int lev, bool hasWallModelMonitor); @@ -346,42 +349,44 @@ public: void cudaCopyProcessNeighborADZIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborADZ(int lev, unsigned int processNeighbor); - void cudaAllocFluidNodeIndices(int lev); - void cudaCopyFluidNodeIndices(int lev); - void cudaFreeFluidNodeIndices(int lev); - void cudaAllocFluidNodeIndicesBorder(int lev); - void cudaCopyFluidNodeIndicesBorder(int lev); - void cudaFreeFluidNodeIndicesBorder(int lev); - - // Actuator Line - void cudaAllocBladeRadii(ActuatorLine* actuatorLine); - void cudaCopyBladeRadiiHtoD(ActuatorLine* actuatorLine); - void cudaCopyBladeRadiiDtoH(ActuatorLine* actuatorLine); - void cudaFreeBladeRadii(ActuatorLine* actuatorLine); - - void cudaAllocBladeCoords(ActuatorLine* actuatorLine); - void cudaCopyBladeCoordsHtoD(ActuatorLine* actuatorLine); - void cudaCopyBladeCoordsDtoH(ActuatorLine* actuatorLine); - void cudaFreeBladeCoords(ActuatorLine* actuatorLine); - - void cudaAllocBladeIndices(ActuatorLine* actuatorLine); - void cudaCopyBladeIndicesHtoD(ActuatorLine* actuatorLine); - void cudaFreeBladeIndices(ActuatorLine* actuatorLine); - - void cudaAllocBladeVelocities(ActuatorLine* actuatorLine); - void cudaCopyBladeVelocitiesHtoD(ActuatorLine* actuatorLine); - void cudaCopyBladeVelocitiesDtoH(ActuatorLine* actuatorLine); - void cudaFreeBladeVelocities(ActuatorLine* actuatorLine); - - void cudaAllocBladeForces(ActuatorLine* actuatorLine); - void cudaCopyBladeForcesHtoD(ActuatorLine* actuatorLine); - void cudaCopyBladeForcesDtoH(ActuatorLine* actuatorLine); - void cudaFreeBladeForces(ActuatorLine* actuatorLine); - - void cudaAllocSphereIndices(ActuatorLine* actuatorLine); - void cudaCopySphereIndicesHtoD(ActuatorLine* actuatorLine); - void cudaFreeSphereIndices(ActuatorLine* actuatorLine); - + void cudaAllocTaggedFluidNodeIndices(CollisionTemplate tag, int lev); + void cudaCopyTaggedFluidNodeIndices(CollisionTemplate tag, int lev); + void cudaFreeTaggedFluidNodeIndices(CollisionTemplate tag, int lev); + + // ActuatorFarm + void cudaAllocBladeGeometries(ActuatorFarm* actuatorFarm); + void cudaCopyBladeGeometriesHtoD(ActuatorFarm* actuatorFarm); + void cudaCopyBladeGeometriesDtoH(ActuatorFarm* actuatorFarm); + void cudaFreeBladeGeometries(ActuatorFarm* actuatorFarm); + + void cudaAllocBladeOrientations(ActuatorFarm* actuatorFarm); + void cudaCopyBladeOrientationsHtoD(ActuatorFarm* actuatorFarm); + void cudaCopyBladeOrientationsDtoH(ActuatorFarm* actuatorFarm); + void cudaFreeBladeOrientations(ActuatorFarm* actuatorFarm); + + void cudaAllocBladeCoords(ActuatorFarm* actuatorFarm); + void cudaCopyBladeCoordsHtoD(ActuatorFarm* actuatorFarm); + void cudaCopyBladeCoordsDtoH(ActuatorFarm* actuatorFarm); + void cudaFreeBladeCoords(ActuatorFarm* actuatorFarm); + + void cudaAllocBladeIndices(ActuatorFarm* actuatorFarm); + void cudaCopyBladeIndicesHtoD(ActuatorFarm* actuatorFarm); + void cudaFreeBladeIndices(ActuatorFarm* actuatorFarm); + + void cudaAllocBladeVelocities(ActuatorFarm* actuatorFarm); + void cudaCopyBladeVelocitiesHtoD(ActuatorFarm* actuatorFarm); + void cudaCopyBladeVelocitiesDtoH(ActuatorFarm* actuatorFarm); + void cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm); + + void cudaAllocBladeForces(ActuatorFarm* actuatorFarm); + void cudaCopyBladeForcesHtoD(ActuatorFarm* actuatorFarm); + void cudaCopyBladeForcesDtoH(ActuatorFarm* actuatorFarm); + void cudaFreeBladeForces(ActuatorFarm* actuatorFarm); + + void cudaAllocSphereIndices(ActuatorFarm* actuatorFarm); + void cudaCopySphereIndicesHtoD(ActuatorFarm* actuatorFarm); + void cudaFreeSphereIndices(ActuatorFarm* actuatorFarm); + // Probes void cudaAllocProbeDistances(Probe* probe, int level); void cudaCopyProbeDistancesHtoD(Probe* probe, int level); void cudaCopyProbeDistancesDtoH(Probe* probe, int level); @@ -402,6 +407,12 @@ public: void cudaCopyProbeQuantitiesAndOffsetsDtoH(Probe* probe, int level); void cudaFreeProbeQuantitiesAndOffsets(Probe* probe, int level); + //Precursor Writer + void cudaAllocPrecursorWriter(PrecursorWriter* writer, int level); + void cudaCopyPrecursorWriterIndicesHtoD(PrecursorWriter* writer, int level); + void cudaCopyPrecursorWriterOutputVariablesDtoH(PrecursorWriter* writer, int level); + void cudaFreePrecursorWriter(PrecursorWriter* writer, int level); + private: std::shared_ptr<Parameter> parameter; double memsizeGPU = 0.0; diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu index bbce8181d814fc8b9dbb086764becb73a86c0eda..553e1f34f7993a42682605b66d53407ede9292fd 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu @@ -21,7 +21,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -37,7 +37,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -48,63 +48,63 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -137,33 +137,33 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k ];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + @@ -975,7 +975,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega, real* dxxUx, real* dyyUy, real* dzzUz, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -989,7 +989,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -1000,63 +1000,63 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1089,33 +1089,33 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k ];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + @@ -2016,7 +2016,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27( real omega, real* coordY, real* coordZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -2030,7 +2030,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -2041,63 +2041,63 @@ __global__ void LB_Kernel_Kum_1h_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -2158,33 +2158,33 @@ __global__ void LB_Kernel_Kum_1h_SP_27( real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k ];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //Ship real coord0X = 281.125f;//7.5f; @@ -3238,7 +3238,7 @@ __global__ void LB_Kernel_Kum_New_SP_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -3252,7 +3252,7 @@ __global__ void LB_Kernel_Kum_New_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -3263,63 +3263,63 @@ __global__ void LB_Kernel_Kum_New_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -3380,33 +3380,33 @@ __global__ void LB_Kernel_Kum_New_SP_27( real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k ];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw ];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb ];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k ];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + @@ -4510,7 +4510,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27( real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -4524,7 +4524,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27( real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -4535,63 +4535,63 @@ __global__ void LB_Kernel_Kum_Comp_SP_27( real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -4624,33 +4624,33 @@ __global__ void LB_Kernel_Kum_Comp_SP_27( real omega, unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ];// + c2over27 ; - real f_W = (D.f[DIR_M00 ])[kw ];// + c2over27 ; - real f_N = (D.f[DIR_0P0 ])[kn ];// + c2over27 ; - real f_S = (D.f[DIR_0M0 ])[ks ];// + c2over27 ; - real f_T = (D.f[DIR_00P ])[kt ];// + c2over27 ; - real f_B = (D.f[DIR_00M ])[kb ];// + c2over27 ; - real f_NE = (D.f[DIR_PP0 ])[kne ];// + c1over54 ; - real f_SW = (D.f[DIR_MM0 ])[ksw ];// + c1over54 ; - real f_SE = (D.f[DIR_PM0 ])[kse ];// + c1over54 ; - real f_NW = (D.f[DIR_MP0 ])[knw ];// + c1over54 ; - real f_TE = (D.f[DIR_P0P ])[kte ];// + c1over54 ; - real f_BW = (D.f[DIR_M0M ])[kbw ];// + c1over54 ; - real f_BE = (D.f[DIR_P0M ])[kbe ];// + c1over54 ; - real f_TW = (D.f[DIR_M0P ])[ktw ];// + c1over54 ; - real f_TN = (D.f[DIR_0PP ])[ktn ];// + c1over54 ; - real f_BS = (D.f[DIR_0MM ])[kbs ];// + c1over54 ; - real f_BN = (D.f[DIR_0PM ])[kbn ];// + c1over54 ; - real f_TS = (D.f[DIR_0MP ])[kts ];// + c1over54 ; + real f_E = (D.f[DIR_P00])[ke ];// + c2over27 ; + real f_W = (D.f[DIR_M00])[kw ];// + c2over27 ; + real f_N = (D.f[DIR_0P0])[kn ];// + c2over27 ; + real f_S = (D.f[DIR_0M0])[ks ];// + c2over27 ; + real f_T = (D.f[DIR_00P])[kt ];// + c2over27 ; + real f_B = (D.f[DIR_00M])[kb ];// + c2over27 ; + real f_NE = (D.f[DIR_PP0])[kne ];// + c1over54 ; + real f_SW = (D.f[DIR_MM0])[ksw ];// + c1over54 ; + real f_SE = (D.f[DIR_PM0])[kse ];// + c1over54 ; + real f_NW = (D.f[DIR_MP0])[knw ];// + c1over54 ; + real f_TE = (D.f[DIR_P0P])[kte ];// + c1over54 ; + real f_BW = (D.f[DIR_M0M])[kbw ];// + c1over54 ; + real f_BE = (D.f[DIR_P0M])[kbe ];// + c1over54 ; + real f_TW = (D.f[DIR_M0P])[ktw ];// + c1over54 ; + real f_TN = (D.f[DIR_0PP])[ktn ];// + c1over54 ; + real f_BS = (D.f[DIR_0MM])[kbs ];// + c1over54 ; + real f_BN = (D.f[DIR_0PM])[kbn ];// + c1over54 ; + real f_TS = (D.f[DIR_0MP])[kts ];// + c1over54 ; real f_R = (D.f[DIR_000])[kzero];// + c8over27 ; - real f_TNE = (D.f[DIR_PPP ])[ktne ];// + c1over216; - real f_TSW = (D.f[DIR_MMP ])[ktsw ];// + c1over216; - real f_TSE = (D.f[DIR_PMP ])[ktse ];// + c1over216; - real f_TNW = (D.f[DIR_MPP ])[ktnw ];// + c1over216; - real f_BNE = (D.f[DIR_PPM ])[kbne ];// + c1over216; - real f_BSW = (D.f[DIR_MMM ])[kbsw ];// + c1over216; - real f_BSE = (D.f[DIR_PMM ])[kbse ];// + c1over216; - real f_BNW = (D.f[DIR_MPM ])[kbnw ];// + c1over216; + real f_TNE = (D.f[DIR_PPP])[ktne ];// + c1over216; + real f_TSW = (D.f[DIR_MMP])[ktsw ];// + c1over216; + real f_TSE = (D.f[DIR_PMP])[ktse ];// + c1over216; + real f_TNW = (D.f[DIR_MPP])[ktnw ];// + c1over216; + real f_BNE = (D.f[DIR_PPM])[kbne ];// + c1over216; + real f_BSW = (D.f[DIR_MMM])[kbsw ];// + c1over216; + real f_BSE = (D.f[DIR_PMM])[kbse ];// + c1over216; + real f_BNW = (D.f[DIR_MPM])[kbnw ];// + c1over216; //////////////////////////////////////////////////////////////////////////////////// real fx = c0o1; real fy = c0o1; @@ -5451,7 +5451,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -5467,7 +5467,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -5478,63 +5478,63 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -5568,33 +5568,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw ]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw ]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs ]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw ]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs ]; - real mfaca = (D.f[DIR_MPM ])[kbw ]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw ]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs ]; + real mfaca = (D.f[DIR_MPM])[kbw ]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + @@ -6349,33 +6349,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; mfbbb += drho - drhoPost; //////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00 ])[k ] = mfabb; - (D.f[DIR_M00 ])[kw ] = mfcbb; - (D.f[DIR_0P0 ])[k ] = mfbab; - (D.f[DIR_0M0 ])[ks ] = mfbcb; - (D.f[DIR_00P ])[k ] = mfbba; - (D.f[DIR_00M ])[kb ] = mfbbc; - (D.f[DIR_PP0 ])[k ] = mfaab; - (D.f[DIR_MM0 ])[ksw ] = mfccb; - (D.f[DIR_PM0 ])[ks ] = mfacb; - (D.f[DIR_MP0 ])[kw ] = mfcab; - (D.f[DIR_P0P ])[k ] = mfaba; - (D.f[DIR_M0M ])[kbw ] = mfcbc; - (D.f[DIR_P0M ])[kb ] = mfabc; - (D.f[DIR_M0P ])[kw ] = mfcba; - (D.f[DIR_0PP ])[k ] = mfbaa; - (D.f[DIR_0MM ])[kbs ] = mfbcc; - (D.f[DIR_0PM ])[kb ] = mfbac; - (D.f[DIR_0MP ])[ks ] = mfbca; + (D.f[DIR_P00])[k ] = mfabb; + (D.f[DIR_M00])[kw ] = mfcbb; + (D.f[DIR_0P0])[k ] = mfbab; + (D.f[DIR_0M0])[ks ] = mfbcb; + (D.f[DIR_00P])[k ] = mfbba; + (D.f[DIR_00M])[kb ] = mfbbc; + (D.f[DIR_PP0])[k ] = mfaab; + (D.f[DIR_MM0])[ksw ] = mfccb; + (D.f[DIR_PM0])[ks ] = mfacb; + (D.f[DIR_MP0])[kw ] = mfcab; + (D.f[DIR_P0P])[k ] = mfaba; + (D.f[DIR_M0M])[kbw ] = mfcbc; + (D.f[DIR_P0M])[kb ] = mfabc; + (D.f[DIR_M0P])[kw ] = mfcba; + (D.f[DIR_0PP])[k ] = mfbaa; + (D.f[DIR_0MM])[kbs ] = mfbcc; + (D.f[DIR_0PM])[kb ] = mfbac; + (D.f[DIR_0MP])[ks ] = mfbca; (D.f[DIR_000])[k ] = mfbbb; - (D.f[DIR_PPP ])[k ] = mfaaa; - (D.f[DIR_PMP ])[ks ] = mfaca; - (D.f[DIR_PPM ])[kb ] = mfaac; - (D.f[DIR_PMM ])[kbs ] = mfacc; - (D.f[DIR_MPP ])[kw ] = mfcaa; - (D.f[DIR_MMP ])[ksw ] = mfcca; - (D.f[DIR_MPM ])[kbw ] = mfcac; - (D.f[DIR_MMM ])[kbsw] = mfccc; + (D.f[DIR_PPP])[k ] = mfaaa; + (D.f[DIR_PMP])[ks ] = mfaca; + (D.f[DIR_PPM])[kb ] = mfaac; + (D.f[DIR_PMM])[kbs ] = mfacc; + (D.f[DIR_MPP])[kw ] = mfcaa; + (D.f[DIR_MMP])[ksw ] = mfcca; + (D.f[DIR_MPM])[kbw ] = mfcac; + (D.f[DIR_MMM])[kbsw] = mfccc; } } } diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu index 97c1aff4d26cb85deaf1dd0d145245f28affc2e3..3706e5f929b50a2a72c107a982525ec3172eb144 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu @@ -51,7 +51,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27( unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -67,7 +67,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27( const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -78,63 +78,63 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -170,33 +170,33 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27( ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + @@ -966,7 +966,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27( unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -982,7 +982,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27( const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -993,63 +993,63 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1085,33 +1085,33 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27( ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + @@ -1762,7 +1762,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27( unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -1778,7 +1778,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27( const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -1789,63 +1789,63 @@ __global__ void Cumulant_One_chim_Comp_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// //index @@ -1857,33 +1857,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27( unsigned int kbs = neighborZ[ks]; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw ]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw ]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs ]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw ]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs ]; - real mfaca = (D.f[DIR_MPM ])[kbw ]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw ]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; + real mfaaa = (D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs ]; + real mfaca = (D.f[DIR_MPM])[kbw ]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + @@ -2204,33 +2204,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27( ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; mfbbb += drho - drhoPost; //////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00 ])[k ] = mfabb; - (D.f[DIR_M00 ])[kw ] = mfcbb; - (D.f[DIR_0P0 ])[k ] = mfbab; - (D.f[DIR_0M0 ])[ks ] = mfbcb; - (D.f[DIR_00P ])[k ] = mfbba; - (D.f[DIR_00M ])[kb ] = mfbbc; - (D.f[DIR_PP0 ])[k ] = mfaab; - (D.f[DIR_MM0 ])[ksw ] = mfccb; - (D.f[DIR_PM0 ])[ks ] = mfacb; - (D.f[DIR_MP0 ])[kw ] = mfcab; - (D.f[DIR_P0P ])[k ] = mfaba; - (D.f[DIR_M0M ])[kbw ] = mfcbc; - (D.f[DIR_P0M ])[kb ] = mfabc; - (D.f[DIR_M0P ])[kw ] = mfcba; - (D.f[DIR_0PP ])[k ] = mfbaa; - (D.f[DIR_0MM ])[kbs ] = mfbcc; - (D.f[DIR_0PM ])[kb ] = mfbac; - (D.f[DIR_0MP ])[ks ] = mfbca; + (D.f[DIR_P00])[k ] = mfabb; + (D.f[DIR_M00])[kw ] = mfcbb; + (D.f[DIR_0P0])[k ] = mfbab; + (D.f[DIR_0M0])[ks ] = mfbcb; + (D.f[DIR_00P])[k ] = mfbba; + (D.f[DIR_00M])[kb ] = mfbbc; + (D.f[DIR_PP0])[k ] = mfaab; + (D.f[DIR_MM0])[ksw ] = mfccb; + (D.f[DIR_PM0])[ks ] = mfacb; + (D.f[DIR_MP0])[kw ] = mfcab; + (D.f[DIR_P0P])[k ] = mfaba; + (D.f[DIR_M0M])[kbw ] = mfcbc; + (D.f[DIR_P0M])[kb ] = mfabc; + (D.f[DIR_M0P])[kw ] = mfcba; + (D.f[DIR_0PP])[k ] = mfbaa; + (D.f[DIR_0MM])[kbs ] = mfbcc; + (D.f[DIR_0PM])[kb ] = mfbac; + (D.f[DIR_0MP])[ks ] = mfbca; (D.f[DIR_000])[k ] = mfbbb; - (D.f[DIR_PPP ])[k ] = mfaaa; - (D.f[DIR_PMP ])[ks ] = mfaca; - (D.f[DIR_PPM ])[kb ] = mfaac; - (D.f[DIR_PMM ])[kbs ] = mfacc; - (D.f[DIR_MPP ])[kw ] = mfcaa; - (D.f[DIR_MMP ])[ksw ] = mfcca; - (D.f[DIR_MPM ])[kbw ] = mfcac; - (D.f[DIR_MMM ])[kbsw] = mfccc; + (D.f[DIR_PPP])[k ] = mfaaa; + (D.f[DIR_PMP])[ks ] = mfaca; + (D.f[DIR_PPM])[kb ] = mfaac; + (D.f[DIR_PMM])[kbs ] = mfacc; + (D.f[DIR_MPP])[kw ] = mfcaa; + (D.f[DIR_MMP])[ksw ] = mfcca; + (D.f[DIR_MPM])[kbw ] = mfcac; + (D.f[DIR_MMM])[kbsw] = mfccc; } } } diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu index 7adfd40da157d825d83c63b084bf1f855ea6dca2..c89c3cfe87560c808d47163b45d512fa0d7e494f 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu @@ -27,7 +27,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, real* vzOut, real* DDStart, real* G6, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -43,7 +43,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k < size_Mat) + if (k < numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -54,83 +54,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } Distributions6 G; if (EvenOrOdd == true) { - G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; + G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -1026,83 +1026,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, // Distributions27 D; // if (EvenOrOdd == true) // { -// D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; -// D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; -// D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; -// D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; -// D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; -// D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; -// D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; -// D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; -// D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; -// D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; -// D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; -// D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; -// D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; -// D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; -// D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; -// D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; -// D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; -// D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; -// D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; -// D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; -// D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; -// D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; -// D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; -// D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; -// D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; -// D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; +// D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; +// D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; +// D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; +// D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; +// D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; +// D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; +// D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; +// D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; +// D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; +// D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; +// D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; +// D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; +// D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; +// D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; +// D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; +// D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; +// D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; +// D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; +// D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; +// D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; +// D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; +// D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; +// D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; +// D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; +// D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; // } // else // { -// D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; -// D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; -// D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; -// D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; -// D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; -// D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; -// D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; -// D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; -// D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; -// D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; -// D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; -// D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; -// D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; -// D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; -// D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; -// D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; -// D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; -// D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; -// D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; -// D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; -// D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; -// D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; -// D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; -// D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; -// D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; -// D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; +// D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; +// D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; +// D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; +// D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; +// D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; +// D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; +// D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; +// D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; +// D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; +// D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; +// D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; +// D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; +// D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; +// D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; +// D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; +// D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; +// D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; +// D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; +// D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; +// D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; +// D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; +// D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; +// D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; +// D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; +// D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; +// D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; // } // // Distributions6 G; // if (EvenOrOdd == true) // { -// G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; -// G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; -// G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; -// G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; -// G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; -// G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; +// G.g[DIR_P00] = &G6[DIR_P00 * size_Mat]; +// G.g[DIR_M00] = &G6[DIR_M00 * size_Mat]; +// G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat]; +// G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat]; +// G.g[DIR_00P] = &G6[DIR_00P * size_Mat]; +// G.g[DIR_00M] = &G6[DIR_00M * size_Mat]; // } // else // { -// G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; -// G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; -// G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; -// G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; -// G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; -// G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; +// G.g[DIR_M00] = &G6[DIR_P00 * size_Mat]; +// G.g[DIR_P00] = &G6[DIR_M00 * size_Mat]; +// G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat]; +// G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat]; +// G.g[DIR_00M] = &G6[DIR_00P * size_Mat]; +// G.g[DIR_00P] = &G6[DIR_00M * size_Mat]; // } // // //////////////////////////////////////////////////////////////////////////////// @@ -2006,83 +2006,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, // Distributions27 D; // if (EvenOrOdd == true) // { -// D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; -// D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; -// D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; -// D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; -// D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; -// D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; -// D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; -// D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; -// D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; -// D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; -// D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; -// D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; -// D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; -// D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; -// D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; -// D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; -// D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; -// D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; -// D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; -// D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; -// D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; -// D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; -// D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; -// D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; -// D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; -// D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; +// D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; +// D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; +// D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; +// D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; +// D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; +// D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; +// D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; +// D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; +// D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; +// D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; +// D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; +// D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; +// D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; +// D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; +// D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; +// D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; +// D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; +// D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; +// D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; +// D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; +// D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; +// D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; +// D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; +// D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; +// D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; // } // else // { -// D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; -// D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; -// D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; -// D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; -// D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; -// D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; -// D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; -// D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; -// D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; -// D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; -// D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; -// D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; -// D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; -// D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; -// D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; -// D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; -// D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; -// D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; -// D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; -// D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; -// D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; -// D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; -// D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; -// D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; -// D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; -// D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; +// D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; +// D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; +// D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; +// D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; +// D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; +// D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; +// D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; +// D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; +// D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; +// D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; +// D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; +// D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; +// D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; +// D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; +// D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; +// D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; +// D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; +// D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; +// D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; +// D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; +// D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; +// D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; +// D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; +// D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; +// D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; +// D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; // } // // Distributions6 G; // if (EvenOrOdd == true) // { -// G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; -// G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; -// G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; -// G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; -// G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; -// G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; +// G.g[DIR_P00] = &G6[DIR_P00 * size_Mat]; +// G.g[DIR_M00] = &G6[DIR_M00 * size_Mat]; +// G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat]; +// G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat]; +// G.g[DIR_00P] = &G6[DIR_00P * size_Mat]; +// G.g[DIR_00M] = &G6[DIR_00M * size_Mat]; // } // else // { -// G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; -// G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; -// G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; -// G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; -// G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; -// G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; +// G.g[DIR_M00] = &G6[DIR_P00 * size_Mat]; +// G.g[DIR_P00] = &G6[DIR_M00 * size_Mat]; +// G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat]; +// G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat]; +// G.g[DIR_00M] = &G6[DIR_00P * size_Mat]; +// G.g[DIR_00P] = &G6[DIR_00M * size_Mat]; // } // // //////////////////////////////////////////////////////////////////////////////// @@ -2153,33 +2153,33 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega, // real dyyuy = c1o2 * (-mgbcb + mgbab); // real dzzuz = c1o2 * (-mgbbc + mgbba); // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke -// real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; -// real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn -// real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; -// real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt -// real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; -// real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne -// real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; -// real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse -// real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw -// real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte -// real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; -// real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe -// real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw -// real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn -// real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; -// real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn -// real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts +// real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke +// real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; +// real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn +// real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; +// real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt +// real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; +// real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne +// real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; +// real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse +// real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw +// real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte +// real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; +// real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe +// real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw +// real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn +// real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; +// real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn +// real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts // real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero -// real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne -// real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw -// real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse -// real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw -// real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne -// real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; -// real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse -// real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw +// real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne +// real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw +// real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse +// real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw +// real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne +// real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; +// real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse +// real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw // //////////////////////////////////////////////////////////////////////////////////// // real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + // (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu index 5146242fed374a919b6dcc02774db1d8ce4f864a..0e3945829725c0614ed4da01d0bae3b99ba2720a 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu @@ -17,69 +17,69 @@ __global__ void DragLiftPost27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -100,24 +100,24 @@ __global__ void DragLiftPost27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -160,32 +160,32 @@ __global__ void DragLiftPost27( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// double OnE = c0o1, OnW = c0o1, OnN = c0o1, OnS = c0o1, OnT = c0o1, OnB = c0o1, OnNE = c0o1, OnSW = c0o1, OnSE = c0o1, OnNW = c0o1, @@ -282,69 +282,69 @@ __global__ void DragLiftPre27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -365,24 +365,24 @@ __global__ void DragLiftPre27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -425,32 +425,32 @@ __global__ void DragLiftPre27( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// double OnE = c0o1, OnW = c0o1, OnN = c0o1, OnS = c0o1, OnT = c0o1, OnB = c0o1, OnNE = c0o1, OnSW = c0o1, OnSE = c0o1, OnNW = c0o1, diff --git a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu index acd62b46c5666fc5f621c3772438e42b7ebef5c6..93879d73a32458d5403fd3fd16e68e0fcea7753d 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu @@ -13,7 +13,7 @@ #include <iomanip> -//#include "Core/Logger/Logger.h" +#include "cuda/CudaGrid.h" #include "Parameter/Parameter.h" // includes, kernels @@ -22,7 +22,7 @@ using namespace vf::lbm::constant; -__global__ void enstrophyKernel ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat ); +__global__ void enstrophyKernel ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes ); __host__ __device__ inline void enstrophyFunction( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint index ); @@ -32,55 +32,40 @@ bool EnstrophyAnalyzer::run(uint iter) { if( iter % this->analyzeIter != 0 ) return false; - int lev = 0; - int size_Mat = this->para->getParD(lev)->numberOfNodes; - - thrust::device_vector<real> enstrophy( size_Mat, c0o1 ); - thrust::device_vector<uint> isFluid ( size_Mat, 0); - - unsigned int numberOfThreads = 128; - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX, - para->getParD(lev)->velocityY, - para->getParD(lev)->velocityZ, - para->getParD(lev)->rho, - para->getParD(lev)->pressure, - para->getParD(lev)->typeOfGridNode, - para->getParD(lev)->neighborX, - para->getParD(lev)->neighborY, - para->getParD(lev)->neighborZ, - para->getParD(lev)->numberOfNodes, - para->getParD(lev)->distributions.f[0], - para->getParD(lev)->isEvenTimestep); - //cudaDeviceSynchronize(); - getLastCudaError("LBCalcMacSP27 execution failed"); - - enstrophyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX, - para->getParD(lev)->velocityY, - para->getParD(lev)->velocityZ, - para->getParD(lev)->rho, - para->getParD(lev)->neighborX, - para->getParD(lev)->neighborY, - para->getParD(lev)->neighborZ, - para->getParD(lev)->neighborInverse, - para->getParD(lev)->typeOfGridNode, - enstrophy.data().get(), - isFluid.data().get(), - size_Mat); + int lev = 0; + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes); + + thrust::device_vector<real> enstrophy( this->para->getParD(lev)->numberOfNodes, c0o1); + thrust::device_vector<uint> isFluid ( this->para->getParD(lev)->numberOfNodes, 0); + + LBCalcMacCompSP27<<< grid.grid, grid.threads >>>( + para->getParD(lev)->velocityX, + para->getParD(lev)->velocityY, + para->getParD(lev)->velocityZ, + para->getParD(lev)->rho, + para->getParD(lev)->pressure, + para->getParD(lev)->typeOfGridNode, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + para->getParD(lev)->numberOfNodes, + para->getParD(lev)->distributions.f[0], + para->getParD(lev)->isEvenTimestep); + getLastCudaError("LBCalcMacCompSP27 execution failed"); + + enstrophyKernel<<< grid.grid, grid.threads >>>( + para->getParD(lev)->velocityX, + para->getParD(lev)->velocityY, + para->getParD(lev)->velocityZ, + para->getParD(lev)->rho, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + para->getParD(lev)->neighborInverse, + para->getParD(lev)->typeOfGridNode, + enstrophy.data().get(), + isFluid.data().get(), + para->getParD(lev)->numberOfNodes); cudaDeviceSynchronize(); getLastCudaError("enstrophyKernel execution failed"); @@ -97,7 +82,7 @@ bool EnstrophyAnalyzer::run(uint iter) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat) +__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes) { ////////////////////////////////////////////////////////////////////////// const uint x = threadIdx.x; // Globaler x-Index @@ -113,7 +98,7 @@ __global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho //if( index % 34 == 0 || index % 34 == 33 ) return; - if( index >= size_Mat) return; + if( index >= (uint)numberOfLBnodes) return; unsigned int BC; BC = geo[index]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu index 5470da46342c85e57370227313c8c82674a17e6e..4ced64c0152bdbbd9752f736e2edca2c51fbc2ff 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu @@ -14,7 +14,7 @@ __global__ void getSendFsPost27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -65,150 +65,150 @@ __global__ void getSendFsPost27(real* DD, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Fs Distributions27 Dbuff; - Dbuff.f[DIR_P00 ] = &bufferFs[DIR_P00 *buffmax]; - Dbuff.f[DIR_M00 ] = &bufferFs[DIR_M00 *buffmax]; - Dbuff.f[DIR_0P0 ] = &bufferFs[DIR_0P0 *buffmax]; - Dbuff.f[DIR_0M0 ] = &bufferFs[DIR_0M0 *buffmax]; - Dbuff.f[DIR_00P ] = &bufferFs[DIR_00P *buffmax]; - Dbuff.f[DIR_00M ] = &bufferFs[DIR_00M *buffmax]; - Dbuff.f[DIR_PP0 ] = &bufferFs[DIR_PP0 *buffmax]; - Dbuff.f[DIR_MM0 ] = &bufferFs[DIR_MM0 *buffmax]; - Dbuff.f[DIR_PM0 ] = &bufferFs[DIR_PM0 *buffmax]; - Dbuff.f[DIR_MP0 ] = &bufferFs[DIR_MP0 *buffmax]; - Dbuff.f[DIR_P0P ] = &bufferFs[DIR_P0P *buffmax]; - Dbuff.f[DIR_M0M ] = &bufferFs[DIR_M0M *buffmax]; - Dbuff.f[DIR_P0M ] = &bufferFs[DIR_P0M *buffmax]; - Dbuff.f[DIR_M0P ] = &bufferFs[DIR_M0P *buffmax]; - Dbuff.f[DIR_0PP ] = &bufferFs[DIR_0PP *buffmax]; - Dbuff.f[DIR_0MM ] = &bufferFs[DIR_0MM *buffmax]; - Dbuff.f[DIR_0PM ] = &bufferFs[DIR_0PM *buffmax]; - Dbuff.f[DIR_0MP ] = &bufferFs[DIR_0MP *buffmax]; - Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax]; - Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax]; - Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax]; - Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax]; - Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax]; - Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax]; - Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax]; - Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax]; - Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax]; + Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax]; + Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax]; + Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax]; + Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax]; + Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax]; + Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax]; + Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax]; + Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax]; + Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax]; + Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax]; + Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax]; + Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax]; + Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax]; + Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax]; + Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax]; + Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax]; + Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax]; + Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax]; + Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax]; + Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax]; + Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax]; + Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax]; + Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax]; + Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax]; + Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax]; + Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax]; + Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy to buffer - //(Dbuff.f[DIR_P00 ])[k] = (D.f[DIR_P00 ])[ke ]; - //(Dbuff.f[DIR_M00 ])[k] = (D.f[DIR_M00 ])[kw ]; - //(Dbuff.f[DIR_0P0 ])[k] = (D.f[DIR_0P0 ])[kn ]; - //(Dbuff.f[DIR_0M0 ])[k] = (D.f[DIR_0M0 ])[ks ]; - //(Dbuff.f[DIR_00P ])[k] = (D.f[DIR_00P ])[kt ]; - //(Dbuff.f[DIR_00M ])[k] = (D.f[DIR_00M ])[kb ]; - //(Dbuff.f[DIR_PP0 ])[k] = (D.f[DIR_PP0 ])[kne ]; - //(Dbuff.f[DIR_MM0 ])[k] = (D.f[DIR_MM0 ])[ksw ]; - //(Dbuff.f[DIR_PM0 ])[k] = (D.f[DIR_PM0 ])[kse ]; - //(Dbuff.f[DIR_MP0 ])[k] = (D.f[DIR_MP0 ])[knw ]; - //(Dbuff.f[DIR_P0P ])[k] = (D.f[DIR_P0P ])[kte ]; - //(Dbuff.f[DIR_M0M ])[k] = (D.f[DIR_M0M ])[kbw ]; - //(Dbuff.f[DIR_P0M ])[k] = (D.f[DIR_P0M ])[kbe ]; - //(Dbuff.f[DIR_M0P ])[k] = (D.f[DIR_M0P ])[ktw ]; - //(Dbuff.f[DIR_0PP ])[k] = (D.f[DIR_0PP ])[ktn ]; - //(Dbuff.f[DIR_0MM ])[k] = (D.f[DIR_0MM ])[kbs ]; - //(Dbuff.f[DIR_0PM ])[k] = (D.f[DIR_0PM ])[kbn ]; - //(Dbuff.f[DIR_0MP ])[k] = (D.f[DIR_0MP ])[kts ]; + //(Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke ]; + //(Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw ]; + //(Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn ]; + //(Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks ]; + //(Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt ]; + //(Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb ]; + //(Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne ]; + //(Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw ]; + //(Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse ]; + //(Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw ]; + //(Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte ]; + //(Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw ]; + //(Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe ]; + //(Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw ]; + //(Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn ]; + //(Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs ]; + //(Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn ]; + //(Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts ]; //(Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero]; - //(Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ]; - //(Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ]; - //(Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ]; - //(Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ]; - //(Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ]; - //(Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ]; - //(Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ]; - //(Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ]; - (Dbuff.f[DIR_P00 ])[k] = (D.f[DIR_M00 ])[kw ]; - (Dbuff.f[DIR_M00 ])[k] = (D.f[DIR_P00 ])[ke ]; - (Dbuff.f[DIR_0P0 ])[k] = (D.f[DIR_0M0 ])[ks ]; - (Dbuff.f[DIR_0M0 ])[k] = (D.f[DIR_0P0 ])[kn ]; - (Dbuff.f[DIR_00P ])[k] = (D.f[DIR_00M ])[kb ]; - (Dbuff.f[DIR_00M ])[k] = (D.f[DIR_00P ])[kt ]; - (Dbuff.f[DIR_PP0 ])[k] = (D.f[DIR_MM0 ])[ksw ]; - (Dbuff.f[DIR_MM0 ])[k] = (D.f[DIR_PP0 ])[kne ]; - (Dbuff.f[DIR_PM0 ])[k] = (D.f[DIR_MP0 ])[knw ]; - (Dbuff.f[DIR_MP0 ])[k] = (D.f[DIR_PM0 ])[kse ]; - (Dbuff.f[DIR_P0P ])[k] = (D.f[DIR_M0M ])[kbw ]; - (Dbuff.f[DIR_M0M ])[k] = (D.f[DIR_P0P ])[kte ]; - (Dbuff.f[DIR_P0M ])[k] = (D.f[DIR_M0P ])[ktw ]; - (Dbuff.f[DIR_M0P ])[k] = (D.f[DIR_P0M ])[kbe ]; - (Dbuff.f[DIR_0PP ])[k] = (D.f[DIR_0MM ])[kbs ]; - (Dbuff.f[DIR_0MM ])[k] = (D.f[DIR_0PP ])[ktn ]; - (Dbuff.f[DIR_0PM ])[k] = (D.f[DIR_0MP ])[kts ]; - (Dbuff.f[DIR_0MP ])[k] = (D.f[DIR_0PM ])[kbn ]; + //(Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ]; + //(Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ]; + //(Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ]; + //(Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ]; + //(Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ]; + //(Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ]; + //(Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ]; + //(Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ]; + (Dbuff.f[DIR_P00])[k] = (D.f[DIR_M00])[kw ]; + (Dbuff.f[DIR_M00])[k] = (D.f[DIR_P00])[ke ]; + (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0M0])[ks ]; + (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0P0])[kn ]; + (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00M])[kb ]; + (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00P])[kt ]; + (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_MM0])[ksw ]; + (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_PP0])[kne ]; + (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_MP0])[knw ]; + (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_PM0])[kse ]; + (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_M0M])[kbw ]; + (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_P0P])[kte ]; + (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_M0P])[ktw ]; + (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_P0M])[kbe ]; + (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0MM])[kbs ]; + (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0PP])[ktn ]; + (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0MP])[kts ]; + (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0PM])[kbn ]; (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero]; - (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_MMM ])[kbsw ]; - (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_PPM ])[kbne ]; - (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_MPM ])[kbnw ]; - (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_PMM ])[kbse ]; - (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_MMP ])[ktsw ]; - (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_PPP ])[ktne ]; - (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_MPP ])[ktnw ]; - (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_PMP ])[ktse ]; + (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_MMM])[kbsw ]; + (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_PPM])[kbne ]; + (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_MPM])[kbnw ]; + (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_PMM])[kbse ]; + (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_MMP])[ktsw ]; + (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_PPP])[ktne ]; + (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_MPP])[ktnw ]; + (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_PMP])[ktse ]; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -250,7 +250,7 @@ __global__ void setRecvFsPost27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -301,150 +301,150 @@ __global__ void setRecvFsPost27(real* DD, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Fs Distributions27 Dbuff; - Dbuff.f[DIR_P00 ] = &bufferFs[DIR_P00 *buffmax]; - Dbuff.f[DIR_M00 ] = &bufferFs[DIR_M00 *buffmax]; - Dbuff.f[DIR_0P0 ] = &bufferFs[DIR_0P0 *buffmax]; - Dbuff.f[DIR_0M0 ] = &bufferFs[DIR_0M0 *buffmax]; - Dbuff.f[DIR_00P ] = &bufferFs[DIR_00P *buffmax]; - Dbuff.f[DIR_00M ] = &bufferFs[DIR_00M *buffmax]; - Dbuff.f[DIR_PP0 ] = &bufferFs[DIR_PP0 *buffmax]; - Dbuff.f[DIR_MM0 ] = &bufferFs[DIR_MM0 *buffmax]; - Dbuff.f[DIR_PM0 ] = &bufferFs[DIR_PM0 *buffmax]; - Dbuff.f[DIR_MP0 ] = &bufferFs[DIR_MP0 *buffmax]; - Dbuff.f[DIR_P0P ] = &bufferFs[DIR_P0P *buffmax]; - Dbuff.f[DIR_M0M ] = &bufferFs[DIR_M0M *buffmax]; - Dbuff.f[DIR_P0M ] = &bufferFs[DIR_P0M *buffmax]; - Dbuff.f[DIR_M0P ] = &bufferFs[DIR_M0P *buffmax]; - Dbuff.f[DIR_0PP ] = &bufferFs[DIR_0PP *buffmax]; - Dbuff.f[DIR_0MM ] = &bufferFs[DIR_0MM *buffmax]; - Dbuff.f[DIR_0PM ] = &bufferFs[DIR_0PM *buffmax]; - Dbuff.f[DIR_0MP ] = &bufferFs[DIR_0MP *buffmax]; - Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax]; - Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax]; - Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax]; - Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax]; - Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax]; - Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax]; - Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax]; - Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax]; - Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax]; + Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax]; + Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax]; + Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax]; + Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax]; + Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax]; + Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax]; + Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax]; + Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax]; + Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax]; + Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax]; + Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax]; + Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax]; + Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax]; + Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax]; + Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax]; + Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax]; + Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax]; + Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax]; + Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax]; + Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax]; + Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax]; + Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax]; + Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax]; + Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax]; + Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax]; + Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax]; + Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy from buffer - //(D.f[DIR_P00 ])[ke ] = (Dbuff.f[DIR_P00 ])[k]; - //(D.f[DIR_M00 ])[kw ] = (Dbuff.f[DIR_M00 ])[k]; - //(D.f[DIR_0P0 ])[kn ] = (Dbuff.f[DIR_0P0 ])[k]; - //(D.f[DIR_0M0 ])[ks ] = (Dbuff.f[DIR_0M0 ])[k]; - //(D.f[DIR_00P ])[kt ] = (Dbuff.f[DIR_00P ])[k]; - //(D.f[DIR_00M ])[kb ] = (Dbuff.f[DIR_00M ])[k]; - //(D.f[DIR_PP0 ])[kne ] = (Dbuff.f[DIR_PP0 ])[k]; - //(D.f[DIR_MM0 ])[ksw ] = (Dbuff.f[DIR_MM0 ])[k]; - //(D.f[DIR_PM0 ])[kse ] = (Dbuff.f[DIR_PM0 ])[k]; - //(D.f[DIR_MP0 ])[knw ] = (Dbuff.f[DIR_MP0 ])[k]; - //(D.f[DIR_P0P ])[kte ] = (Dbuff.f[DIR_P0P ])[k]; - //(D.f[DIR_M0M ])[kbw ] = (Dbuff.f[DIR_M0M ])[k]; - //(D.f[DIR_P0M ])[kbe ] = (Dbuff.f[DIR_P0M ])[k]; - //(D.f[DIR_M0P ])[ktw ] = (Dbuff.f[DIR_M0P ])[k]; - //(D.f[DIR_0PP ])[ktn ] = (Dbuff.f[DIR_0PP ])[k]; - //(D.f[DIR_0MM ])[kbs ] = (Dbuff.f[DIR_0MM ])[k]; - //(D.f[DIR_0PM ])[kbn ] = (Dbuff.f[DIR_0PM ])[k]; - //(D.f[DIR_0MP ])[kts ] = (Dbuff.f[DIR_0MP ])[k]; + //(D.f[DIR_P00])[ke ] = (Dbuff.f[DIR_P00])[k]; + //(D.f[DIR_M00])[kw ] = (Dbuff.f[DIR_M00])[k]; + //(D.f[DIR_0P0])[kn ] = (Dbuff.f[DIR_0P0])[k]; + //(D.f[DIR_0M0])[ks ] = (Dbuff.f[DIR_0M0])[k]; + //(D.f[DIR_00P])[kt ] = (Dbuff.f[DIR_00P])[k]; + //(D.f[DIR_00M])[kb ] = (Dbuff.f[DIR_00M])[k]; + //(D.f[DIR_PP0])[kne ] = (Dbuff.f[DIR_PP0])[k]; + //(D.f[DIR_MM0])[ksw ] = (Dbuff.f[DIR_MM0])[k]; + //(D.f[DIR_PM0])[kse ] = (Dbuff.f[DIR_PM0])[k]; + //(D.f[DIR_MP0])[knw ] = (Dbuff.f[DIR_MP0])[k]; + //(D.f[DIR_P0P])[kte ] = (Dbuff.f[DIR_P0P])[k]; + //(D.f[DIR_M0M])[kbw ] = (Dbuff.f[DIR_M0M])[k]; + //(D.f[DIR_P0M])[kbe ] = (Dbuff.f[DIR_P0M])[k]; + //(D.f[DIR_M0P])[ktw ] = (Dbuff.f[DIR_M0P])[k]; + //(D.f[DIR_0PP])[ktn ] = (Dbuff.f[DIR_0PP])[k]; + //(D.f[DIR_0MM])[kbs ] = (Dbuff.f[DIR_0MM])[k]; + //(D.f[DIR_0PM])[kbn ] = (Dbuff.f[DIR_0PM])[k]; + //(D.f[DIR_0MP])[kts ] = (Dbuff.f[DIR_0MP])[k]; //(D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k]; - //(D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k]; - //(D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k]; - //(D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k]; - //(D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k]; - //(D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k]; - //(D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k]; - //(D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k]; - //(D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k]; - (D.f[DIR_M00 ])[kw ] = (Dbuff.f[DIR_P00 ])[k]; - (D.f[DIR_P00 ])[ke ] = (Dbuff.f[DIR_M00 ])[k]; - (D.f[DIR_0M0 ])[ks ] = (Dbuff.f[DIR_0P0 ])[k]; - (D.f[DIR_0P0 ])[kn ] = (Dbuff.f[DIR_0M0 ])[k]; - (D.f[DIR_00M ])[kb ] = (Dbuff.f[DIR_00P ])[k]; - (D.f[DIR_00P ])[kt ] = (Dbuff.f[DIR_00M ])[k]; - (D.f[DIR_MM0 ])[ksw ] = (Dbuff.f[DIR_PP0 ])[k]; - (D.f[DIR_PP0 ])[kne ] = (Dbuff.f[DIR_MM0 ])[k]; - (D.f[DIR_MP0 ])[knw ] = (Dbuff.f[DIR_PM0 ])[k]; - (D.f[DIR_PM0 ])[kse ] = (Dbuff.f[DIR_MP0 ])[k]; - (D.f[DIR_M0M ])[kbw ] = (Dbuff.f[DIR_P0P ])[k]; - (D.f[DIR_P0P ])[kte ] = (Dbuff.f[DIR_M0M ])[k]; - (D.f[DIR_M0P ])[ktw ] = (Dbuff.f[DIR_P0M ])[k]; - (D.f[DIR_P0M ])[kbe ] = (Dbuff.f[DIR_M0P ])[k]; - (D.f[DIR_0MM ])[kbs ] = (Dbuff.f[DIR_0PP ])[k]; - (D.f[DIR_0PP ])[ktn ] = (Dbuff.f[DIR_0MM ])[k]; - (D.f[DIR_0MP ])[kts ] = (Dbuff.f[DIR_0PM ])[k]; - (D.f[DIR_0PM ])[kbn ] = (Dbuff.f[DIR_0MP ])[k]; + //(D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k]; + //(D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k]; + //(D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k]; + //(D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k]; + //(D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k]; + //(D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k]; + //(D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k]; + //(D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k]; + (D.f[DIR_M00])[kw ] = (Dbuff.f[DIR_P00])[k]; + (D.f[DIR_P00])[ke ] = (Dbuff.f[DIR_M00])[k]; + (D.f[DIR_0M0])[ks ] = (Dbuff.f[DIR_0P0])[k]; + (D.f[DIR_0P0])[kn ] = (Dbuff.f[DIR_0M0])[k]; + (D.f[DIR_00M])[kb ] = (Dbuff.f[DIR_00P])[k]; + (D.f[DIR_00P])[kt ] = (Dbuff.f[DIR_00M])[k]; + (D.f[DIR_MM0])[ksw ] = (Dbuff.f[DIR_PP0])[k]; + (D.f[DIR_PP0])[kne ] = (Dbuff.f[DIR_MM0])[k]; + (D.f[DIR_MP0])[knw ] = (Dbuff.f[DIR_PM0])[k]; + (D.f[DIR_PM0])[kse ] = (Dbuff.f[DIR_MP0])[k]; + (D.f[DIR_M0M])[kbw ] = (Dbuff.f[DIR_P0P])[k]; + (D.f[DIR_P0P])[kte ] = (Dbuff.f[DIR_M0M])[k]; + (D.f[DIR_M0P])[ktw ] = (Dbuff.f[DIR_P0M])[k]; + (D.f[DIR_P0M])[kbe ] = (Dbuff.f[DIR_M0P])[k]; + (D.f[DIR_0MM])[kbs ] = (Dbuff.f[DIR_0PP])[k]; + (D.f[DIR_0PP])[ktn ] = (Dbuff.f[DIR_0MM])[k]; + (D.f[DIR_0MP])[kts ] = (Dbuff.f[DIR_0PM])[k]; + (D.f[DIR_0PM])[kbn ] = (Dbuff.f[DIR_0MP])[k]; (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k]; - (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_PPP ])[k]; - (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_MMP ])[k]; - (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_PMP ])[k]; - (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_MPP ])[k]; - (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_PPM ])[k]; - (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_MMM ])[k]; - (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_PMM ])[k]; - (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_MPM ])[k]; + (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_PPP])[k]; + (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_MMP])[k]; + (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_PMP])[k]; + (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_MPP])[k]; + (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_PPM])[k]; + (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_MMM])[k]; + (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_PMM])[k]; + (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_MPM])[k]; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -485,7 +485,7 @@ __global__ void getSendFsPre27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -536,123 +536,123 @@ __global__ void getSendFsPre27(real* DD, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Fs Distributions27 Dbuff; - Dbuff.f[DIR_P00 ] = &bufferFs[DIR_P00 *buffmax]; - Dbuff.f[DIR_M00 ] = &bufferFs[DIR_M00 *buffmax]; - Dbuff.f[DIR_0P0 ] = &bufferFs[DIR_0P0 *buffmax]; - Dbuff.f[DIR_0M0 ] = &bufferFs[DIR_0M0 *buffmax]; - Dbuff.f[DIR_00P ] = &bufferFs[DIR_00P *buffmax]; - Dbuff.f[DIR_00M ] = &bufferFs[DIR_00M *buffmax]; - Dbuff.f[DIR_PP0 ] = &bufferFs[DIR_PP0 *buffmax]; - Dbuff.f[DIR_MM0 ] = &bufferFs[DIR_MM0 *buffmax]; - Dbuff.f[DIR_PM0 ] = &bufferFs[DIR_PM0 *buffmax]; - Dbuff.f[DIR_MP0 ] = &bufferFs[DIR_MP0 *buffmax]; - Dbuff.f[DIR_P0P ] = &bufferFs[DIR_P0P *buffmax]; - Dbuff.f[DIR_M0M ] = &bufferFs[DIR_M0M *buffmax]; - Dbuff.f[DIR_P0M ] = &bufferFs[DIR_P0M *buffmax]; - Dbuff.f[DIR_M0P ] = &bufferFs[DIR_M0P *buffmax]; - Dbuff.f[DIR_0PP ] = &bufferFs[DIR_0PP *buffmax]; - Dbuff.f[DIR_0MM ] = &bufferFs[DIR_0MM *buffmax]; - Dbuff.f[DIR_0PM ] = &bufferFs[DIR_0PM *buffmax]; - Dbuff.f[DIR_0MP ] = &bufferFs[DIR_0MP *buffmax]; - Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax]; - Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax]; - Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax]; - Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax]; - Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax]; - Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax]; - Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax]; - Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax]; - Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax]; + Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax]; + Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax]; + Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax]; + Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax]; + Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax]; + Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax]; + Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax]; + Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax]; + Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax]; + Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax]; + Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax]; + Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax]; + Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax]; + Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax]; + Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax]; + Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax]; + Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax]; + Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax]; + Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax]; + Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax]; + Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax]; + Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax]; + Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax]; + Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax]; + Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax]; + Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax]; + Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy to buffer - (Dbuff.f[DIR_P00 ])[k] = (D.f[DIR_P00 ])[ke ]; - (Dbuff.f[DIR_M00 ])[k] = (D.f[DIR_M00 ])[kw ]; - (Dbuff.f[DIR_0P0 ])[k] = (D.f[DIR_0P0 ])[kn ]; - (Dbuff.f[DIR_0M0 ])[k] = (D.f[DIR_0M0 ])[ks ]; - (Dbuff.f[DIR_00P ])[k] = (D.f[DIR_00P ])[kt ]; - (Dbuff.f[DIR_00M ])[k] = (D.f[DIR_00M ])[kb ]; - (Dbuff.f[DIR_PP0 ])[k] = (D.f[DIR_PP0 ])[kne ]; - (Dbuff.f[DIR_MM0 ])[k] = (D.f[DIR_MM0 ])[ksw ]; - (Dbuff.f[DIR_PM0 ])[k] = (D.f[DIR_PM0 ])[kse ]; - (Dbuff.f[DIR_MP0 ])[k] = (D.f[DIR_MP0 ])[knw ]; - (Dbuff.f[DIR_P0P ])[k] = (D.f[DIR_P0P ])[kte ]; - (Dbuff.f[DIR_M0M ])[k] = (D.f[DIR_M0M ])[kbw ]; - (Dbuff.f[DIR_P0M ])[k] = (D.f[DIR_P0M ])[kbe ]; - (Dbuff.f[DIR_M0P ])[k] = (D.f[DIR_M0P ])[ktw ]; - (Dbuff.f[DIR_0PP ])[k] = (D.f[DIR_0PP ])[ktn ]; - (Dbuff.f[DIR_0MM ])[k] = (D.f[DIR_0MM ])[kbs ]; - (Dbuff.f[DIR_0PM ])[k] = (D.f[DIR_0PM ])[kbn ]; - (Dbuff.f[DIR_0MP ])[k] = (D.f[DIR_0MP ])[kts ]; + (Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke ]; + (Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw ]; + (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn ]; + (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks ]; + (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt ]; + (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb ]; + (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne ]; + (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw ]; + (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse ]; + (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw ]; + (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte ]; + (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw ]; + (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe ]; + (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw ]; + (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn ]; + (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs ]; + (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn ]; + (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts ]; (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero]; - (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ]; - (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ]; - (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ]; - (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ]; - (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ]; - (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ]; - (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ]; - (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ]; + (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ]; + (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ]; + (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ]; + (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ]; + (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ]; + (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ]; + (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ]; + (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ]; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -694,7 +694,7 @@ __global__ void setRecvFsPre27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -745,123 +745,123 @@ __global__ void setRecvFsPre27(real* DD, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Fs Distributions27 Dbuff; - Dbuff.f[DIR_P00 ] = &bufferFs[DIR_P00 *buffmax]; - Dbuff.f[DIR_M00 ] = &bufferFs[DIR_M00 *buffmax]; - Dbuff.f[DIR_0P0 ] = &bufferFs[DIR_0P0 *buffmax]; - Dbuff.f[DIR_0M0 ] = &bufferFs[DIR_0M0 *buffmax]; - Dbuff.f[DIR_00P ] = &bufferFs[DIR_00P *buffmax]; - Dbuff.f[DIR_00M ] = &bufferFs[DIR_00M *buffmax]; - Dbuff.f[DIR_PP0 ] = &bufferFs[DIR_PP0 *buffmax]; - Dbuff.f[DIR_MM0 ] = &bufferFs[DIR_MM0 *buffmax]; - Dbuff.f[DIR_PM0 ] = &bufferFs[DIR_PM0 *buffmax]; - Dbuff.f[DIR_MP0 ] = &bufferFs[DIR_MP0 *buffmax]; - Dbuff.f[DIR_P0P ] = &bufferFs[DIR_P0P *buffmax]; - Dbuff.f[DIR_M0M ] = &bufferFs[DIR_M0M *buffmax]; - Dbuff.f[DIR_P0M ] = &bufferFs[DIR_P0M *buffmax]; - Dbuff.f[DIR_M0P ] = &bufferFs[DIR_M0P *buffmax]; - Dbuff.f[DIR_0PP ] = &bufferFs[DIR_0PP *buffmax]; - Dbuff.f[DIR_0MM ] = &bufferFs[DIR_0MM *buffmax]; - Dbuff.f[DIR_0PM ] = &bufferFs[DIR_0PM *buffmax]; - Dbuff.f[DIR_0MP ] = &bufferFs[DIR_0MP *buffmax]; - Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax]; - Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax]; - Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax]; - Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax]; - Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax]; - Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax]; - Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax]; - Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax]; - Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax]; + Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax]; + Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax]; + Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax]; + Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax]; + Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax]; + Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax]; + Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax]; + Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax]; + Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax]; + Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax]; + Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax]; + Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax]; + Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax]; + Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax]; + Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax]; + Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax]; + Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax]; + Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax]; + Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax]; + Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax]; + Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax]; + Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax]; + Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax]; + Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax]; + Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax]; + Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax]; + Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy from buffer - (D.f[DIR_P00 ])[ke ] = (Dbuff.f[DIR_P00 ])[k]; - (D.f[DIR_M00 ])[kw ] = (Dbuff.f[DIR_M00 ])[k]; - (D.f[DIR_0P0 ])[kn ] = (Dbuff.f[DIR_0P0 ])[k]; - (D.f[DIR_0M0 ])[ks ] = (Dbuff.f[DIR_0M0 ])[k]; - (D.f[DIR_00P ])[kt ] = (Dbuff.f[DIR_00P ])[k]; - (D.f[DIR_00M ])[kb ] = (Dbuff.f[DIR_00M ])[k]; - (D.f[DIR_PP0 ])[kne ] = (Dbuff.f[DIR_PP0 ])[k]; - (D.f[DIR_MM0 ])[ksw ] = (Dbuff.f[DIR_MM0 ])[k]; - (D.f[DIR_PM0 ])[kse ] = (Dbuff.f[DIR_PM0 ])[k]; - (D.f[DIR_MP0 ])[knw ] = (Dbuff.f[DIR_MP0 ])[k]; - (D.f[DIR_P0P ])[kte ] = (Dbuff.f[DIR_P0P ])[k]; - (D.f[DIR_M0M ])[kbw ] = (Dbuff.f[DIR_M0M ])[k]; - (D.f[DIR_P0M ])[kbe ] = (Dbuff.f[DIR_P0M ])[k]; - (D.f[DIR_M0P ])[ktw ] = (Dbuff.f[DIR_M0P ])[k]; - (D.f[DIR_0PP ])[ktn ] = (Dbuff.f[DIR_0PP ])[k]; - (D.f[DIR_0MM ])[kbs ] = (Dbuff.f[DIR_0MM ])[k]; - (D.f[DIR_0PM ])[kbn ] = (Dbuff.f[DIR_0PM ])[k]; - (D.f[DIR_0MP ])[kts ] = (Dbuff.f[DIR_0MP ])[k]; + (D.f[DIR_P00])[ke ] = (Dbuff.f[DIR_P00])[k]; + (D.f[DIR_M00])[kw ] = (Dbuff.f[DIR_M00])[k]; + (D.f[DIR_0P0])[kn ] = (Dbuff.f[DIR_0P0])[k]; + (D.f[DIR_0M0])[ks ] = (Dbuff.f[DIR_0M0])[k]; + (D.f[DIR_00P])[kt ] = (Dbuff.f[DIR_00P])[k]; + (D.f[DIR_00M])[kb ] = (Dbuff.f[DIR_00M])[k]; + (D.f[DIR_PP0])[kne ] = (Dbuff.f[DIR_PP0])[k]; + (D.f[DIR_MM0])[ksw ] = (Dbuff.f[DIR_MM0])[k]; + (D.f[DIR_PM0])[kse ] = (Dbuff.f[DIR_PM0])[k]; + (D.f[DIR_MP0])[knw ] = (Dbuff.f[DIR_MP0])[k]; + (D.f[DIR_P0P])[kte ] = (Dbuff.f[DIR_P0P])[k]; + (D.f[DIR_M0M])[kbw ] = (Dbuff.f[DIR_M0M])[k]; + (D.f[DIR_P0M])[kbe ] = (Dbuff.f[DIR_P0M])[k]; + (D.f[DIR_M0P])[ktw ] = (Dbuff.f[DIR_M0P])[k]; + (D.f[DIR_0PP])[ktn ] = (Dbuff.f[DIR_0PP])[k]; + (D.f[DIR_0MM])[kbs ] = (Dbuff.f[DIR_0MM])[k]; + (D.f[DIR_0PM])[kbn ] = (Dbuff.f[DIR_0PM])[k]; + (D.f[DIR_0MP])[kts ] = (Dbuff.f[DIR_0MP])[k]; (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k]; - (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k]; - (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k]; - (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k]; - (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k]; - (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k]; - (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k]; - (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k]; - (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k]; + (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k]; + (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k]; + (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k]; + (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k]; + (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k]; + (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k]; + (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k]; + (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k]; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -903,7 +903,7 @@ __global__ void getSendGsF3( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -931,31 +931,31 @@ __global__ void getSendGsF3( Distributions6 G; if (isEvenTimestep) { - G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; + G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Gs Distributions6 Dbuff; - Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 *buffmax]; - Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 *buffmax]; - Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 *buffmax]; - Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 *buffmax]; - Dbuff.g[DIR_00P] = &bufferGs[DIR_00P *buffmax]; - Dbuff.g[DIR_00M] = &bufferGs[DIR_00M *buffmax]; + Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax]; + Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax]; + Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax]; + Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax]; + Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax]; + Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //write Gs to buffer (Dbuff.g[DIR_P00])[k] = (G.g[DIR_M00])[kw]; @@ -1006,7 +1006,7 @@ __global__ void setRecvGsF3( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -1034,31 +1034,31 @@ __global__ void setRecvGsF3( Distributions6 G; if (isEvenTimestep) { - G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; + G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes]; + G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes]; + G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes]; + G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes]; + G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes]; + G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set Pointer for Buffer Gs Distributions6 Dbuff; - Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 *buffmax]; - Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 *buffmax]; - Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 *buffmax]; - Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 *buffmax]; - Dbuff.g[DIR_00P] = &bufferGs[DIR_00P *buffmax]; - Dbuff.g[DIR_00M] = &bufferGs[DIR_00M *buffmax]; + Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax]; + Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax]; + Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax]; + Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax]; + Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax]; + Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //write buffer to Gs (G.g[DIR_M00])[kw] = (Dbuff.g[DIR_P00])[k]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h index ee987ae23402ef304220349db77084cc341ccd5a..ae8cbb77ec2493126d64b90a7119cbfa3efee666 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h @@ -29,7 +29,7 @@ void KernelCas27(unsigned int grid_nx, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelCasSP27(unsigned int numberOfThreads, @@ -39,7 +39,7 @@ void KernelCasSP27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelCasSPMS27(unsigned int numberOfThreads, @@ -49,7 +49,7 @@ void KernelCasSPMS27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelCasSPMSOHM27( unsigned int numberOfThreads, @@ -59,7 +59,7 @@ void KernelCasSPMSOHM27( unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelKumCompSRTSP27( @@ -70,7 +70,7 @@ void KernelKumCompSRTSP27( unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd); @@ -82,7 +82,7 @@ void KernelCumulantD3Q27All4(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd); @@ -94,7 +94,7 @@ void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int size_Array, int level, real* forces, @@ -112,7 +112,7 @@ void KernelKum1hSP27( unsigned int numberOfThreads, real* coordY, real* coordZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelCascadeSP27(unsigned int numberOfThreads, @@ -122,7 +122,7 @@ void KernelCascadeSP27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelKumNewSP27( unsigned int numberOfThreads, @@ -132,7 +132,7 @@ void KernelKumNewSP27( unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); @@ -144,7 +144,7 @@ void CumulantOnePreconditionedErrorDiffusionChimCompSP27( unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int size_Array, int level, real* forces, @@ -158,7 +158,7 @@ void CumulantOnePreconditionedChimCompSP27( unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int size_Array, int level, real* forces, @@ -172,7 +172,7 @@ void CumulantOneChimCompSP27( unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int size_Array, int level, real* forces, @@ -189,7 +189,7 @@ void KernelKumIsoTestSP27(unsigned int numberOfThreads, real* dxxUx, real* dyyUy, real* dzzUz, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelKumCompSP27( unsigned int numberOfThreads, @@ -199,7 +199,7 @@ void KernelKumCompSP27( unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelWaleBySoniMalavCumAA2016CompSP27( @@ -215,7 +215,7 @@ void KernelWaleBySoniMalavCumAA2016CompSP27( real* veloZ, real* DD, real* turbulentViscosity, - int size_Mat, + unsigned long long numberOfLBnodes, int size_Array, int level, real* forces, @@ -227,7 +227,7 @@ void KernelPMCumOneCompSP27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, real porosity, @@ -245,7 +245,7 @@ void KernelADincomp7( unsigned int numberOfThreads, unsigned int* neighborZ, real* DD, real* DD7, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void KernelADincomp27( unsigned int numberOfThreads, @@ -256,7 +256,7 @@ void KernelADincomp27( unsigned int numberOfThreads, unsigned int* neighborZ, real* DD, real* DD7, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); void Init27(int myid, @@ -267,7 +267,7 @@ void Init27(int myid, unsigned int* neighborY, unsigned int* neighborZ, real* vParab, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int grid_nx, unsigned int grid_ny, unsigned int grid_nz, @@ -285,7 +285,7 @@ void InitNonEqPartSP27(unsigned int numberOfThreads, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, real omega, bool EvenOrOdd); @@ -300,7 +300,7 @@ void InitThS7( unsigned int numberOfThreads, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool EvenOrOdd); @@ -313,7 +313,7 @@ void InitADDev27( unsigned int numberOfThreads, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD27, bool EvenOrOdd); @@ -330,7 +330,7 @@ void PostProcessorF3_2018Fehlberg( real* vzOut, real* DDStart, real* G6, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd); @@ -343,7 +343,7 @@ void CalcMac27( real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int grid_nx, unsigned int grid_ny, unsigned int grid_nz, @@ -359,7 +359,7 @@ void CalcMacSP27(real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -373,7 +373,7 @@ void CalcMacCompSP27(real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -383,7 +383,7 @@ void CalcMacThS7( real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD7, bool isEvenTimestep); @@ -395,7 +395,7 @@ void PlaneConcThS7(real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD7, bool isEvenTimestep); @@ -407,7 +407,7 @@ void PlaneConcThS27(real* Conc, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD27, bool isEvenTimestep); @@ -418,7 +418,7 @@ void CalcConcentration27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD27, bool isEvenTimestep); @@ -431,7 +431,7 @@ void CalcMedSP27( real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -445,7 +445,7 @@ void CalcMedCompSP27(real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -461,7 +461,7 @@ void CalcMedCompAD27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, real* DD_AD, @@ -477,7 +477,7 @@ void CalcMacMedSP27( real* vxD, unsigned int* neighborY, unsigned int* neighborZ, unsigned int tdiff, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, bool isEvenTimestep); @@ -487,7 +487,7 @@ void ResetMedianValuesSP27( real* vzD, real* rhoD, real* pressD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, bool isEvenTimestep); @@ -498,7 +498,7 @@ void ResetMedianValuesAD27( real* rhoD, real* pressD, real* concD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, bool isEvenTimestep); @@ -511,7 +511,7 @@ void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -525,7 +525,7 @@ void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -541,7 +541,7 @@ void Calc3rdMomentsIncompSP27(real* CUMbbb, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -557,7 +557,7 @@ void Calc3rdMomentsCompSP27(real* CUMbbb, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -576,7 +576,7 @@ void CalcHigherMomentsIncompSP27(real* CUMcbb, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -595,7 +595,7 @@ void CalcHigherMomentsCompSP27(real* CUMcbb, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, real* DD, bool isEvenTimestep); @@ -612,7 +612,7 @@ void LBCalcMeasurePoints27(real* vxMP, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, unsigned int numberOfThreads, bool isEvenTimestep); @@ -627,7 +627,7 @@ void BcPress27(int nx, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void BcVel27(int nx, @@ -641,7 +641,7 @@ void BcVel27(int nx, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, real u0x, real om); @@ -661,7 +661,7 @@ void QDevCompThinWalls27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, unsigned int* neighborWSB, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -675,7 +675,7 @@ void QDevIncompHighNu27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QDevCompHighNu27(unsigned int numberOfThreads, @@ -687,7 +687,7 @@ void QDevCompHighNu27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -704,7 +704,7 @@ void QVelDeviceCouette27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDevice1h27( unsigned int numberOfThreads, @@ -726,7 +726,7 @@ void QVelDevice1h27( unsigned int numberOfThreads, real* coordX, real* coordY, real* coordZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -743,7 +743,7 @@ void QVelDevCompPlusSlip27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -762,7 +762,7 @@ void QVelDevCompThinWalls27(unsigned int numberOfThreads, unsigned int* neighborY, unsigned int* neighborZ, unsigned int* neighborWSB, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -779,7 +779,7 @@ void QVelDevIncompHighNu27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVelDevCompHighNu27(unsigned int numberOfThreads, @@ -794,7 +794,7 @@ void QVelDevCompHighNu27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVeloDevEQ27(unsigned int numberOfThreads, @@ -808,7 +808,7 @@ void QVeloDevEQ27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QVeloStreetDevEQ27( @@ -848,7 +848,7 @@ void QSlipGeomDevComp27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QSlipNormDevComp27(unsigned int numberOfThreads, @@ -863,7 +863,7 @@ void QSlipNormDevComp27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QStressDevComp27(Parameter *para, QforBoundaryConditions* boundaryCondition, const int level); @@ -883,7 +883,7 @@ void QPressDevFixBackflow27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QPressDevDirDepBot27(unsigned int numberOfThreads, @@ -895,11 +895,13 @@ void QPressDevDirDepBot27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); + void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); void QPressDevOld27(unsigned int numberOfThreads, @@ -912,7 +914,7 @@ void QPressDevOld27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -928,7 +930,7 @@ void QPressDevZero27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QPressDevFake27( unsigned int numberOfThreads, @@ -941,7 +943,7 @@ void QPressDevFake27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition); @@ -956,7 +958,7 @@ void QPressDev27_IntBB( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QPressDevAntiBB27( unsigned int numberOfThreads, @@ -972,7 +974,7 @@ void QPressDevAntiBB27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void PressSchlaffer27(unsigned int numberOfThreads, @@ -989,7 +991,7 @@ void PressSchlaffer27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void VelSchlaffer27( unsigned int numberOfThreads, @@ -1004,9 +1006,17 @@ void VelSchlaffer27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); +void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void PrecursorDevEQ27(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void PrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + +void QPrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio); + void QADDev7(unsigned int numberOfThreads, real* DD, real* DD7, @@ -1019,7 +1029,7 @@ void QADDev7(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); ////////////////////////////////////////////////////////////////////////// @@ -1033,7 +1043,7 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel( uint* neighborZ, real* distributions, real* distributionsAD, - int size_Mat, + unsigned long long numberOfLBnodes, real* forces, bool isEvenTimestep); @@ -1053,7 +1063,7 @@ void ADSlipVelDevComp( uint * neighborX, uint * neighborY, uint * neighborZ, - uint size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADDirichletDev27( unsigned int numberOfThreads, @@ -1068,7 +1078,7 @@ void QADDirichletDev27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADBBDev27( unsigned int numberOfThreads, @@ -1083,7 +1093,7 @@ void QADBBDev27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADVelDev7(unsigned int numberOfThreads, @@ -1099,7 +1109,7 @@ void QADVelDev7(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); @@ -1116,7 +1126,7 @@ void QADVelDev27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADPressDev7( unsigned int numberOfThreads, @@ -1132,7 +1142,7 @@ void QADPressDev7( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADPressDev27(unsigned int numberOfThreads, @@ -1148,7 +1158,7 @@ void QADPressDev27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADPressNEQNeighborDev27( @@ -1161,7 +1171,7 @@ void QADPressNEQNeighborDev27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep ); @@ -1177,7 +1187,7 @@ void QNoSlipADincompDev7(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QNoSlipADincompDev27(unsigned int numberOfThreads, @@ -1192,7 +1202,7 @@ void QNoSlipADincompDev27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADVeloIncompDev7( unsigned int numberOfThreads, @@ -1208,7 +1218,7 @@ void QADVeloIncompDev7( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); @@ -1225,7 +1235,7 @@ void QADVeloIncompDev27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADPressIncompDev7( unsigned int numberOfThreads, @@ -1241,7 +1251,7 @@ void QADPressIncompDev7( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void QADPressIncompDev27( unsigned int numberOfThreads, @@ -1257,7 +1267,7 @@ void QADPressIncompDev27( unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void PropVelo( unsigned int numberOfThreads, @@ -1270,7 +1280,7 @@ void PropVelo( unsigned int numberOfThreads, real* uz, int* k_Q, unsigned int size_Prop, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int* bcMatD, real* DD, bool EvenOrOdd); @@ -1283,8 +1293,8 @@ void ScaleCF27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1306,8 +1316,8 @@ void ScaleFC27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1329,8 +1339,8 @@ void ScaleCFEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1353,8 +1363,8 @@ void ScaleFCEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1377,8 +1387,8 @@ void ScaleCFLast27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1401,8 +1411,8 @@ void ScaleFCLast27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1425,8 +1435,8 @@ void ScaleCFpress27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1449,8 +1459,8 @@ void ScaleFCpress27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1473,8 +1483,8 @@ void ScaleCF_Fix_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1497,8 +1507,8 @@ void ScaleCF_Fix_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1521,8 +1531,8 @@ void ScaleCF_0817_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1547,8 +1557,8 @@ void ScaleCF_comp_D3Q27F3_2018( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1572,8 +1582,8 @@ void ScaleCF_comp_D3Q27F3(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1597,8 +1607,8 @@ void ScaleCF_staggered_time_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1624,8 +1634,8 @@ void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1649,8 +1659,8 @@ void ScaleCF_AA2016_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1674,8 +1684,8 @@ void ScaleCF_NSPress_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1698,8 +1708,8 @@ void ScaleFC_Fix_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1722,8 +1732,8 @@ void ScaleFC_Fix_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1746,8 +1756,8 @@ void ScaleFC_0817_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1772,8 +1782,8 @@ void ScaleFC_comp_D3Q27F3_2018(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1797,8 +1807,8 @@ void ScaleFC_comp_D3Q27F3( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1822,8 +1832,8 @@ void ScaleFC_staggered_time_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1849,8 +1859,8 @@ void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1874,8 +1884,8 @@ void ScaleFC_AA2016_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1899,8 +1909,8 @@ void ScaleFC_NSPress_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1925,8 +1935,8 @@ void ScaleCFThS7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1945,8 +1955,8 @@ void ScaleFCThS7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1965,8 +1975,8 @@ void ScaleCFThSMG7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1986,8 +1996,8 @@ void ScaleFCThSMG7(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -2007,8 +2017,8 @@ void ScaleCFThS27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -2028,8 +2038,8 @@ void ScaleFCThS27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -2049,7 +2059,7 @@ void DragLiftPostD27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2063,7 +2073,7 @@ void DragLiftPreD27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2074,7 +2084,7 @@ void CalcCPtop27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2085,7 +2095,7 @@ void CalcCPbottom27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2096,7 +2106,7 @@ void GetSendFsPreDev27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads, cudaStream_t stream = CU_STREAM_LEGACY); @@ -2108,7 +2118,7 @@ void GetSendFsPostDev27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads, cudaStream_t stream = CU_STREAM_LEGACY); @@ -2120,7 +2130,7 @@ void SetRecvFsPreDev27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads, cudaStream_t stream = CU_STREAM_LEGACY); @@ -2131,7 +2141,7 @@ void SetRecvFsPostDev27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads, cudaStream_t stream = CU_STREAM_LEGACY); @@ -2144,7 +2154,7 @@ void getSendGsDevF3( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2156,7 +2166,7 @@ void setRecvGsDevF3( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, unsigned int numberOfThreads); @@ -2172,7 +2182,7 @@ void WallFuncDev27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void SetOutputWallVelocitySP27(unsigned int numberOfThreads, @@ -2190,7 +2200,7 @@ void SetOutputWallVelocitySP27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep); @@ -2204,7 +2214,7 @@ void GetVelotoForce27(unsigned int numberOfThreads, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); void InitParticlesDevice(real* coordX, @@ -2229,7 +2239,7 @@ void InitParticlesDevice(real* coordX, unsigned int* neighborWSB, int level, unsigned int numberOfParticles, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads); void MoveParticlesDevice(real* coordX, @@ -2257,16 +2267,16 @@ void MoveParticlesDevice(real* coordX, unsigned int timestep, unsigned int numberOfTimesteps, unsigned int numberOfParticles, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads, bool isEvenTimestep); void initRandomDevice(curandState* state, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int numberOfThreads); void generateRandomValuesDevice(curandState* state, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* randArray, unsigned int numberOfThreads); @@ -2285,7 +2295,7 @@ void CalcTurbulenceIntensityDevice( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, uint numberOfThreads); diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh index 94b9704b7ca57df4cd985f5aff9521b8a087b97f..3134db44346ee7f465a5c8f04505ee5749482fbf 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh @@ -22,7 +22,7 @@ __global__ void LB_Kernel_Casc27(real s9, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); __global__ void LB_Kernel_Casc_SP_27( real s9, @@ -31,7 +31,7 @@ __global__ void LB_Kernel_Casc_SP_27( real s9, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); __global__ void LB_Kernel_Casc_SP_MS_27( real s9, @@ -40,7 +40,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27( real s9, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real s9, @@ -49,134 +49,134 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27( real s9, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27( - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void LB_Kernel_Kum_1h_SP_27( real omega, - real deltaPhi, - real angularVelocity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + real deltaPhi, + real angularVelocity, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LB_Kernel_Cascade_SP_27( real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LB_Kernel_Kum_New_SP_27( real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - real* dxxUx, - real* dyyUy, - real* dzzUz, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + real* dxxUx, + real* dyyUy, + real* dzzUz, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LB_Kernel_Kum_Comp_SP_27(real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27( - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27( - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void Cumulant_One_chim_Comp_SP_27( - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); inline __device__ void forwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K); @@ -189,57 +189,57 @@ inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, rea __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27( - real omega_in, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - real* veloX, - real* veloY, - real* veloZ, - real* DDStart, - real* turbulentViscosity, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega_in, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + real* veloX, + real* veloY, + real* veloZ, + real* DDStart, + real* turbulentViscosity, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - real porosity, - real darcy, - real forchheimer, - unsigned int sizeOfPorousMedia, - unsigned int* nodeIdsPorousMedia, - bool EvenOrOdd); + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + real porosity, + real darcy, + real forchheimer, + unsigned int sizeOfPorousMedia, + unsigned int* nodeIdsPorousMedia, + bool EvenOrOdd); __global__ void LB_Kernel_AD_Incomp_7( real diffusivity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - real* DD7, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + real* DD7, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LB_Kernel_AD_Incomp_27( real diffusivity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - real* DD27, - int size_Mat, - bool EvenOrOdd); + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + real* DD27, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LBInit27( int myid, int numprocs, @@ -249,7 +249,7 @@ __global__ void LBInit27( int myid, unsigned int* neighborY, unsigned int* neighborZ, real* vParabel, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int grid_nx, unsigned int grid_ny, unsigned int grid_nz, @@ -266,7 +266,7 @@ __global__ void LBInitNonEqPartSP27(unsigned int* neighborX, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, real omega, bool EvenOrOdd); @@ -279,7 +279,7 @@ __global__ void InitAD7( unsigned int* neighborX, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool EvenOrOdd); @@ -291,26 +291,26 @@ __global__ void InitAD27(unsigned int* neighborX, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD27, bool EvenOrOdd); __global__ void LB_PostProcessor_F3_2018_Fehlberg( - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* rhoOut, - real* vxOut, - real* vyOut, - real* vzOut, - real* DDStart, - real* G6, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd); + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* rhoOut, + real* vxOut, + real* vyOut, + real* vzOut, + real* DDStart, + real* G6, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd); __global__ void LBCalcMac27( real* vxD, real* vyD, @@ -320,7 +320,7 @@ __global__ void LBCalcMac27( real* vxD, unsigned int* neighborY, unsigned int* neighborZ, unsigned int* geoD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep); @@ -333,60 +333,60 @@ __global__ void LBCalcMacSP27( real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep); __global__ void LBCalcMacCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep); + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep); __global__ void CalcConc7( real* Conc, unsigned int* geoD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool isEvenTimestep); __global__ void GetPlaneConc7(real* Conc, - int* kPC, - unsigned int numberOfPointskPC, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD7, - bool isEvenTimestep); + int* kPC, + unsigned int numberOfPointskPC, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD7, + bool isEvenTimestep); __global__ void GetPlaneConc27(real* Conc, - int* kPC, - unsigned int numberOfPointskPC, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD27, - bool isEvenTimestep); + int* kPC, + unsigned int numberOfPointskPC, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD27, + bool isEvenTimestep); __global__ void CalcConc27(real* Conc, unsigned int* geoD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD27, bool isEvenTimestep); @@ -399,38 +399,38 @@ __global__ void LBCalcMedSP27( real* vxD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep); __global__ void LBCalcMedCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep); + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep); __global__ void LBCalcMedCompAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - real* DD_AD, - bool isEvenTimestep); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + real* DD_AD, + bool isEvenTimestep); __global__ void LBCalcMacMedSP27( real* vxD, real* vyD, @@ -442,119 +442,119 @@ __global__ void LBCalcMacMedSP27( real* vxD, unsigned int* neighborY, unsigned int* neighborZ, unsigned int tdiff, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void LBResetMedianValuesSP27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int size_Mat, - bool isEvenTimestep); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void LBResetMedianValuesAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int size_Mat, - bool isEvenTimestep); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void LBCalc2ndMomentsIncompSP27( real* kxyFromfcNEQ, - real* kyzFromfcNEQ, - real* kxzFromfcNEQ, - real* kxxMyyFromfcNEQ, - real* kxxMzzFromfcNEQ, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep); + real* kyzFromfcNEQ, + real* kxzFromfcNEQ, + real* kxxMyyFromfcNEQ, + real* kxxMzzFromfcNEQ, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep); __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ, - real* kyzFromfcNEQ, - real* kxzFromfcNEQ, - real* kxxMyyFromfcNEQ, - real* kxxMzzFromfcNEQ, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep); + real* kyzFromfcNEQ, + real* kxzFromfcNEQ, + real* kxxMyyFromfcNEQ, + real* kxxMzzFromfcNEQ, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep); __global__ void LBCalc3rdMomentsIncompSP27( real* CUMbbb, - real* CUMabc, - real* CUMbac, - real* CUMbca, - real* CUMcba, - real* CUMacb, - real* CUMcab, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + real* CUMabc, + real* CUMbac, + real* CUMbca, + real* CUMcba, + real* CUMacb, + real* CUMcab, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb, - real* CUMabc, - real* CUMbac, - real* CUMbca, - real* CUMcba, - real* CUMacb, - real* CUMcab, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + real* CUMabc, + real* CUMbac, + real* CUMbca, + real* CUMcba, + real* CUMacb, + real* CUMcab, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LBCalcHigherMomentsIncompSP27( real* CUMcbb, - real* CUMbcb, - real* CUMbbc, - real* CUMcca, - real* CUMcac, - real* CUMacc, - real* CUMbcc, - real* CUMcbc, - real* CUMccb, - real* CUMccc, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + real* CUMbcb, + real* CUMbbc, + real* CUMcca, + real* CUMcac, + real* CUMacc, + real* CUMbcc, + real* CUMcbc, + real* CUMccb, + real* CUMccc, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb, - real* CUMbcb, - real* CUMbbc, - real* CUMcca, - real* CUMcac, - real* CUMacc, - real* CUMbcc, - real* CUMcbc, - real* CUMccb, - real* CUMccc, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd); + real* CUMbcb, + real* CUMbbc, + real* CUMcca, + real* CUMcac, + real* CUMacc, + real* CUMbcc, + real* CUMcbc, + real* CUMccb, + real* CUMccc, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd); __global__ void LBCalcMeasurePoints(real* vxMP, real* vyMP, @@ -568,7 +568,7 @@ __global__ void LBCalcMeasurePoints(real* vxMP, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, bool isEvenTimestep); @@ -580,7 +580,7 @@ __global__ void LB_BC_Press_East27( int nx, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) ; __global__ void LB_BC_Vel_West_27( int nx, @@ -592,7 +592,7 @@ __global__ void LB_BC_Vel_West_27( int nx, unsigned int* neighborY, unsigned int* neighborZ, real* DD, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep, real u0x, unsigned int grid_nx, @@ -608,64 +608,64 @@ __global__ void QDevice27(real* distributions, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int numberOfLBnodes, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep); + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QDeviceCompThinWallsPartOne27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); - -__global__ void QDevice3rdMomentsComp27( real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void QDevice3rdMomentsComp27( real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QDeviceIncompHighNu27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QDeviceCompHighNu27( real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); //Velocity BCs __global__ void QVelDevPlainBB27( @@ -679,43 +679,43 @@ __global__ void QVelDevPlainBB27( uint* neighborX, uint* neighborY, uint* neighborZ, - uint numberOfLBnodes, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QVelDevCouette27(real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDev1h27( int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned int size_Mat, - bool isEvenTimestep); + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDevice27(int inx, int iny, @@ -730,111 +730,111 @@ __global__ void QVelDevice27(int inx, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QVelDeviceCompPlusSlip27(real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDeviceComp27(real* velocityX, - real* velocityY, - real* velocityZ, - real* distribution, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep); + real* velocityY, + real* velocityZ, + real* distribution, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDeviceCompThinWallsPartOne27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - uint numberOfBCnodes, - real om1, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint size_Mat, - bool isEvenTimestep); + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + uint numberOfBCnodes, + real om1, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QThinWallsPartTwo27( - real* DD, - int* k_Q, - real* QQ, - uint numberOfBCnodes, - uint* geom, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighborWSB, - uint size_Mat, - bool isEvenTimestep); + real* DD, + int* k_Q, + real* QQ, + uint numberOfBCnodes, + uint* geom, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborWSB, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDeviceCompZeroPress27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distribution, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep); + real* velocityX, + real* velocityY, + real* velocityZ, + real* distribution, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDeviceIncompHighNu27(real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVelDeviceCompHighNu27( real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QVeloDeviceEQ27(real* VeloX, - real* VeloY, - real* VeloZ, + real* VeloY, + real* VeloZ, real* DD, int* k_Q, int numberOfBCnodes, @@ -842,22 +842,22 @@ __global__ void QVeloDeviceEQ27(real* VeloX, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QVeloStreetDeviceEQ27( - real* veloXfraction, - real* veloYfraction, - int* naschVelo, - real* DD, - int* naschIndex, - int numberOfStreetNodes, - real velocityRatio, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint size_Mat, - bool isEvenTimestep); + real* veloXfraction, + real* veloYfraction, + int* naschVelo, + real* DD, + int* naschIndex, + int numberOfStreetNodes, + real velocityRatio, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); //Slip BCs __global__ void QSlipDevice27(real* DD, @@ -868,139 +868,150 @@ __global__ void QSlipDevice27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QSlipDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QSlipDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, + real* distributions, + int* subgridDistanceIndices, real* subgridDistances, unsigned int numberOfBCnodes, - real omega, + real omega, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, real* turbViscosity, - unsigned int numberOfLBnodes, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QSlipPressureDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, + real* distributions, + int* subgridDistanceIndices, real* subgridDistances, unsigned int numberOfBCnodes, - real omega, + real omega, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, real* turbViscosity, - unsigned int numberOfLBnodes, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QSlipGeomDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QSlipNormDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void BBSlipDeviceComp27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); // Stress BCs (wall model) __global__ void QStressDeviceComp27(real* DD, - int* k_Q, - int* k_N, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* turbViscosity, - real* vx, - real* vy, - real* vz, - real* normalX, - real* normalY, - real* normalZ, - real* vx_bc, - real* vy_bc, - real* vz_bc, - real* vx1, - real* vy1, - real* vz1, - int* samplingOffset, - real* z0, - bool hasWallModelMonitor, - real* u_star_monitor, - real* Fx_monitor, - real* Fy_monitor, - real* Fz_monitor, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + int* k_N, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void BBStressDevice27( real* DD, - int* k_Q, - int* k_N, - real* QQ, - unsigned int numberOfBCnodes, - real* vx, - real* vy, - real* vz, - real* normalX, - real* normalY, - real* normalZ, - real* vx_bc, - real* vy_bc, - real* vz_bc, - real* vx1, - real* vy1, - real* vz1, - int* samplingOffset, - real* z0, - bool hasWallModelMonitor, - real* u_star_monitor, - real* Fx_monitor, - real* Fy_monitor, - real* Fz_monitor, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + int* k_N, + real* QQ, + unsigned int numberOfBCnodes, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_bc, + real* vy_bc, + real* vz_bc, + real* vx1, + real* vy1, + real* vz1, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void BBStressPressureDevice27( real* DD, - int* k_Q, + int* k_Q, int* k_N, real* QQ, unsigned int numberOfBCnodes, @@ -1026,7 +1037,7 @@ __global__ void BBStressPressureDevice27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); //Pressure BCs @@ -1039,23 +1050,23 @@ __global__ void QPressDevice27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDeviceAntiBB27( real* rhoBC, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QPressDeviceFixBackflow27( real* rhoBC, real* DD, @@ -1065,7 +1076,7 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDeviceDirDepBot27( real* rhoBC, @@ -1076,32 +1087,47 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction); + +__global__ void QPressZeroRhoOutflowDevice27( real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor); __global__ void QInflowScaleByPressDevice27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QPressDeviceOld27(real* rhoBC, real* DD, @@ -1112,20 +1138,20 @@ __global__ void QPressDeviceOld27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDeviceIncompNEQ27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QPressDeviceNEQ27(real* rhoBC, real* distribution, @@ -1136,7 +1162,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDeviceEQZ27(real* rhoBC, @@ -1149,17 +1175,17 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDeviceZero27( real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QPressDeviceFake27(real* rhoBC, real* DD, @@ -1170,7 +1196,7 @@ __global__ void QPressDeviceFake27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void BBDevice27(real* distributions, @@ -1180,20 +1206,20 @@ __global__ void BBDevice27(real* distributions, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int numberOfLBnodes, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QPressDevice27_IntBB(real* rho, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 //Schlaffer BCs @@ -1210,7 +1236,7 @@ __global__ void PressSchlaff27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 @@ -1225,9 +1251,106 @@ __global__ void VelSchlaff27( int t, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); +__global__ void QPrecursorDeviceCompZeroPress( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + int sizeQ, + real omega, + real* distributions, + real* subgridDistances, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void PrecursorDeviceEQ27( int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real omega, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + +__global__ void PrecursorDeviceDistributions( int* subgridDistanceIndices, + int numberOfBCNodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); +__global__ void QPrecursorDeviceDistributions( int* subgridDistanceIndices, + real* subgridDistances, + int sizeQ, + int numberOfBCNodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborsNT, + uint* neighborsNB, + uint* neighborsST, + uint* neighborsSB, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); + //Advection / Diffusion BCs __global__ void QAD7( real* DD, real* DD7, @@ -1240,68 +1363,68 @@ __global__ void QAD7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); ////////////////////////////////////////////////////////////////////////// //! \brief \ref Advection_Diffusion_Device_Kernel : Factorized central moments for Advection Diffusion Equation __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel( - real omegaDiffusivity, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - real* distributionsAD, - int size_Mat, - real* forces, - bool isEvenTimestep); + real omegaDiffusivity, + uint* typeOfGridNode, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + real* distributions, + real* distributionsAD, + unsigned long long numberOfLBnodes, + real* forces, + bool isEvenTimestep); ////////////////////////////////////////////////////////////////////////// //! \brief \ref AD_SlipVelDeviceComp : device function for the slip-AD boundary condition __global__ void AD_SlipVelDeviceComp( - real * normalX, - real * normalY, - real * normalZ, - real * distributions, - real * distributionsAD, - int* QindexArray, - real * Qarrays, - uint numberOfBCnodes, - real omegaDiffusivity, - uint * neighborX, - uint * neighborY, - uint * neighborZ, - uint size_Mat, - bool isEvenTimestep); + real * normalX, + real * normalY, + real * normalZ, + real * distributions, + real * distributionsAD, + int* QindexArray, + real * Qarrays, + uint numberOfBCnodes, + real omegaDiffusivity, + uint * neighborX, + uint * neighborY, + uint * neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADDirichlet27( real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADBB27( real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADVel7( real* DD, real* DD7, @@ -1315,7 +1438,7 @@ __global__ void QADVel7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QADVel27(real* DD, @@ -1330,7 +1453,7 @@ __global__ void QADVel27(real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QADPress7( real* DD, @@ -1345,7 +1468,7 @@ __global__ void QADPress7( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QADPress27( real* DD, @@ -1360,109 +1483,109 @@ __global__ void QADPress27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void QADPressNEQNeighbor27( - real* DD, - real* DD27, - int* k_Q, - int* k_N, - int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep - ); + real* DD, + real* DD27, + int* k_Q, + int* k_N, + int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep + ); __global__ void QNoSlipADincomp7( real* DD, - real* DD7, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD7, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QNoSlipADincomp27( real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADVeloIncomp7( real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADVeloIncomp27( real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADPressIncomp7(real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void QADPressIncomp27( real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); //Propeller BC __global__ void PropellerBC(unsigned int* neighborX, @@ -1473,8 +1596,8 @@ __global__ void PropellerBC(unsigned int* neighborX, real* uy, real* uz, int* k_Q, - unsigned int size_Prop, - unsigned int size_Mat, + unsigned int size_Prop, + unsigned long long numberOfLBnodes, unsigned int* bcMatD, real* DD, bool EvenOrOdd); @@ -1490,19 +1613,19 @@ __global__ void scaleCF27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF); + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF); __global__ void scaleCFEff27(real* DC, real* DF, @@ -1512,18 +1635,18 @@ __global__ void scaleCFEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, unsigned int nyF, OffCF offCF); @@ -1535,8 +1658,8 @@ __global__ void scaleCFLast27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1558,8 +1681,8 @@ __global__ void scaleCFpress27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1581,8 +1704,8 @@ __global__ void scaleCF_Fix_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1597,233 +1720,233 @@ __global__ void scaleCF_Fix_27(real* DC, OffCF offCF); __global__ void scaleCF_Fix_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_0817_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_comp_D3Q27F3_2018( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_comp_D3Q27F3( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_staggered_time_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_RhoSq_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_compressible( - real* distributionsCoarse, - real* distributionsFine, + real* distributionsCoarse, + real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, - unsigned int numberOfLBnodesCoarse, - unsigned int numberOfLBnodesFine, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, - unsigned int* indicesCoarseMMM, - unsigned int* indicesFineMMM, - unsigned int numberOfInterfaceNodes, - real omegaCoarse, - real omegaFine, + unsigned int* indicesCoarseMMM, + unsigned int* indicesFineMMM, + unsigned int numberOfInterfaceNodes, + real omegaCoarse, + real omegaFine, OffCF offsetCF); __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_AA2016_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCF_NSPress_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffCF offCF); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffCF offCF); __global__ void scaleCFThSMG7( real* DC, real* DF, @@ -1835,8 +1958,8 @@ __global__ void scaleCFThSMG7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1855,8 +1978,8 @@ __global__ void scaleCFThS7(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -1874,15 +1997,15 @@ __global__ void scaleCFThS27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, unsigned int kCF, real nu, real diffusivity_fine, - OffCF offCF); + OffCF offCF); //fine to coarse __global__ void scaleFC27(real* DC, @@ -1893,18 +2016,18 @@ __global__ void scaleFC27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, unsigned int nyF); __global__ void scaleFCEff27(real* DC, @@ -1915,8 +2038,8 @@ __global__ void scaleFCEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1938,8 +2061,8 @@ __global__ void scaleFCLast27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1961,8 +2084,8 @@ __global__ void scaleFCpress27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1984,8 +2107,8 @@ __global__ void scaleFC_Fix_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -2000,145 +2123,145 @@ __global__ void scaleFC_Fix_27( real* DC, OffFC offFC); __global__ void scaleFC_Fix_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_0817_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_comp_D3Q27F3_2018( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_comp_D3Q27F3( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_staggered_time_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_RhoSq_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_compressible( real *distributionsCoarse, @@ -2149,8 +2272,8 @@ __global__ void scaleFC_compressible( unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, - unsigned int numberOfLBnodesCoarse, - unsigned int numberOfLBnodesFine, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, @@ -2160,73 +2283,73 @@ __global__ void scaleFC_compressible( OffFC offsetFC); __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_AA2016_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFC_NSPress_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - OffFC offFC); + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC); __global__ void scaleFCThSMG7( real* DC, real* DF, @@ -2238,8 +2361,8 @@ __global__ void scaleFCThSMG7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -2258,8 +2381,8 @@ __global__ void scaleFCThS7(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -2277,242 +2400,242 @@ __global__ void scaleFCThS27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, unsigned int kFC, real nu, real diffusivity_coarse, - OffFC offFC); + OffFC offFC); __global__ void DragLiftPost27( real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - double *DragX, - double *DragY, - double *DragZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + int numberOfBCnodes, + double *DragX, + double *DragY, + double *DragZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void DragLiftPre27( real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - double *DragX, - double *DragY, - double *DragZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* k_Q, + real* QQ, + int numberOfBCnodes, + double *DragX, + double *DragY, + double *DragZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void CalcCP27(real* DD, - int* cpIndex, - int nonCp, - double *cpPress, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* cpIndex, + int nonCp, + double *cpPress, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void getSendFsPre27(real* DD, - real* bufferFs, - int* sendIndex, + real* bufferFs, + int* sendIndex, int buffmax, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void getSendFsPost27(real* DD, - real* bufferFs, - int* sendIndex, + real* bufferFs, + int* sendIndex, int buffmax, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void setRecvFsPre27(real* DD, - real* bufferFs, - int* recvIndex, + real* bufferFs, + int* recvIndex, int buffmax, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void setRecvFsPost27(real* DD, - real* bufferFs, - int* recvIndex, + real* bufferFs, + int* recvIndex, int buffmax, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); __global__ void getSendGsF3( - real* G6, - real* bufferGs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* G6, + real* bufferGs, + int* sendIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void setRecvGsF3( - real* G6, - real* bufferGs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* G6, + real* bufferGs, + int* recvIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void WallFunction27( real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void LBSetOutputWallVelocitySP27( real* vxD, - real* vyD, - real* vzD, - real* vxWall, - real* vyWall, - real* vzWall, - int numberOfWallNodes, - int* kWallNodes, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep); + real* vyD, + real* vzD, + real* vxWall, + real* vyWall, + real* vzWall, + int numberOfWallNodes, + int* kWallNodes, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep); __global__ void GetVeloforForcing27( real* DD, - int* bcIndex, - int nonAtBC, - real* Vx, - real* Vy, - real* Vz, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep); + int* bcIndex, + int nonAtBC, + real* Vx, + real* Vy, + real* Vz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void InitParticles( real* coordX, - real* coordY, - real* coordZ, - real* coordParticleXlocal, - real* coordParticleYlocal, - real* coordParticleZlocal, - real* coordParticleXglobal, - real* coordParticleYglobal, - real* coordParticleZglobal, - real* veloParticleX, - real* veloParticleY, - real* veloParticleZ, - real* randArray, - unsigned int* particleID, - unsigned int* cellBaseID, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - int level, - unsigned int numberOfParticles, - unsigned int size_Mat); + real* coordY, + real* coordZ, + real* coordParticleXlocal, + real* coordParticleYlocal, + real* coordParticleZlocal, + real* coordParticleXglobal, + real* coordParticleYglobal, + real* coordParticleZglobal, + real* veloParticleX, + real* veloParticleY, + real* veloParticleZ, + real* randArray, + unsigned int* particleID, + unsigned int* cellBaseID, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + int level, + unsigned int numberOfParticles, + unsigned long long numberOfLBnodes); __global__ void MoveParticles( real* coordX, - real* coordY, - real* coordZ, - real* coordParticleXlocal, - real* coordParticleYlocal, - real* coordParticleZlocal, - real* coordParticleXglobal, - real* coordParticleYglobal, - real* coordParticleZglobal, - real* veloParticleX, - real* veloParticleY, - real* veloParticleZ, - real* DD, - real omega, - unsigned int* particleID, - unsigned int* cellBaseID, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - int level, - unsigned int timestep, - unsigned int numberOfTimesteps, - unsigned int numberOfParticles, - unsigned int size_Mat, - bool isEvenTimestep); + real* coordY, + real* coordZ, + real* coordParticleXlocal, + real* coordParticleYlocal, + real* coordParticleZlocal, + real* coordParticleXglobal, + real* coordParticleYglobal, + real* coordParticleZglobal, + real* veloParticleX, + real* veloParticleY, + real* veloParticleZ, + real* DD, + real omega, + unsigned int* particleID, + unsigned int* cellBaseID, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + int level, + unsigned int timestep, + unsigned int numberOfTimesteps, + unsigned int numberOfParticles, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void MoveParticlesWithoutBCs( real* coordX, - real* coordY, - real* coordZ, - real* coordParticleXlocal, - real* coordParticleYlocal, - real* coordParticleZlocal, - real* coordParticleXglobal, - real* coordParticleYglobal, - real* coordParticleZglobal, - real* veloParticleX, - real* veloParticleY, - real* veloParticleZ, - real* DD, - real omega, - unsigned int* particleID, - unsigned int* cellBaseID, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - int level, - unsigned int timestep, - unsigned int numberOfTimesteps, - unsigned int numberOfParticles, - unsigned int size_Mat, - bool isEvenTimestep); + real* coordY, + real* coordZ, + real* coordParticleXlocal, + real* coordParticleYlocal, + real* coordParticleZlocal, + real* coordParticleXglobal, + real* coordParticleYglobal, + real* coordParticleZglobal, + real* veloParticleX, + real* veloParticleY, + real* veloParticleZ, + real* DD, + real omega, + unsigned int* particleID, + unsigned int* cellBaseID, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + int level, + unsigned int timestep, + unsigned int numberOfTimesteps, + unsigned int numberOfParticles, + unsigned long long numberOfLBnodes, + bool isEvenTimestep); __global__ void initRandom(curandState* state); __global__ void generateRandomValues(curandState* state, - real* randArray); + real* randArray); __global__ void CalcTurbulenceIntensity( real* vxx, @@ -2529,7 +2652,7 @@ __global__ void CalcTurbulenceIntensity( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep); #endif diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu index 619d68c87d7a707e70be4c56d434191994144148..641d6519669b1522430fe88990c00d0630d00e9b 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu @@ -22,8 +22,8 @@ __global__ void scaleCF_0817_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -43,33 +43,33 @@ __global__ void scaleCF_0817_comp_27( real* DC, *f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest; - fP00dest = &DF[DIR_P00 *size_MatF]; - fM00dest = &DF[DIR_M00 *size_MatF]; - f0P0dest = &DF[DIR_0P0 *size_MatF]; - f0M0dest = &DF[DIR_0M0 *size_MatF]; - f00Pdest = &DF[DIR_00P *size_MatF]; - f00Mdest = &DF[DIR_00M *size_MatF]; - fPP0dest = &DF[DIR_PP0 *size_MatF]; - fMM0dest = &DF[DIR_MM0 *size_MatF]; - fPM0dest = &DF[DIR_PM0 *size_MatF]; - fMP0dest = &DF[DIR_MP0 *size_MatF]; - fP0Pdest = &DF[DIR_P0P *size_MatF]; - fM0Mdest = &DF[DIR_M0M *size_MatF]; - fP0Mdest = &DF[DIR_P0M *size_MatF]; - fM0Pdest = &DF[DIR_M0P *size_MatF]; - f0PPdest = &DF[DIR_0PP *size_MatF]; - f0MMdest = &DF[DIR_0MM *size_MatF]; - f0PMdest = &DF[DIR_0PM *size_MatF]; - f0MPdest = &DF[DIR_0MP *size_MatF]; - f000dest = &DF[DIR_000*size_MatF]; - fMMMdest = &DF[DIR_MMM *size_MatF]; - fMMPdest = &DF[DIR_MMP *size_MatF]; - fMPPdest = &DF[DIR_MPP *size_MatF]; - fMPMdest = &DF[DIR_MPM *size_MatF]; - fPPMdest = &DF[DIR_PPM *size_MatF]; - fPPPdest = &DF[DIR_PPP *size_MatF]; - fPMPdest = &DF[DIR_PMP *size_MatF]; - fPMMdest = &DF[DIR_PMM *size_MatF]; + fP00dest = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00dest = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Pdest = &DF[DIR_00P * numberOfLBnodesFine]; + f00Mdest = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine]; + f000dest = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source, @@ -78,63 +78,63 @@ __global__ void scaleCF_0817_comp_27( real* DC, if (isEvenTimestep == true) { - fP00source = &DC[DIR_P00 *size_MatC]; - fM00source = &DC[DIR_M00 *size_MatC]; - f0P0source = &DC[DIR_0P0 *size_MatC]; - f0M0source = &DC[DIR_0M0 *size_MatC]; - f00Psource = &DC[DIR_00P *size_MatC]; - f00Msource = &DC[DIR_00M *size_MatC]; - fPP0source = &DC[DIR_PP0 *size_MatC]; - fMM0source = &DC[DIR_MM0 *size_MatC]; - fPM0source = &DC[DIR_PM0 *size_MatC]; - fMP0source = &DC[DIR_MP0 *size_MatC]; - fP0Psource = &DC[DIR_P0P *size_MatC]; - fM0Msource = &DC[DIR_M0M *size_MatC]; - fP0Msource = &DC[DIR_P0M *size_MatC]; - fM0Psource = &DC[DIR_M0P *size_MatC]; - f0PPsource = &DC[DIR_0PP *size_MatC]; - f0MMsource = &DC[DIR_0MM *size_MatC]; - f0PMsource = &DC[DIR_0PM *size_MatC]; - f0MPsource = &DC[DIR_0MP *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_MMM *size_MatC]; - fMMPsource = &DC[DIR_MMP *size_MatC]; - fMPPsource = &DC[DIR_MPP *size_MatC]; - fMPMsource = &DC[DIR_MPM *size_MatC]; - fPPMsource = &DC[DIR_PPM *size_MatC]; - fPPPsource = &DC[DIR_PPP *size_MatC]; - fPMPsource = &DC[DIR_PMP *size_MatC]; - fPMMsource = &DC[DIR_PMM *size_MatC]; + fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00source = &DC[DIR_M00 *size_MatC]; - fM00source = &DC[DIR_P00 *size_MatC]; - f0P0source = &DC[DIR_0M0 *size_MatC]; - f0M0source = &DC[DIR_0P0 *size_MatC]; - f00Psource = &DC[DIR_00M *size_MatC]; - f00Msource = &DC[DIR_00P *size_MatC]; - fPP0source = &DC[DIR_MM0 *size_MatC]; - fMM0source = &DC[DIR_PP0 *size_MatC]; - fPM0source = &DC[DIR_MP0 *size_MatC]; - fMP0source = &DC[DIR_PM0 *size_MatC]; - fP0Psource = &DC[DIR_M0M *size_MatC]; - fM0Msource = &DC[DIR_P0P *size_MatC]; - fP0Msource = &DC[DIR_M0P *size_MatC]; - fM0Psource = &DC[DIR_P0M *size_MatC]; - f0PPsource = &DC[DIR_0MM *size_MatC]; - f0MMsource = &DC[DIR_0PP *size_MatC]; - f0PMsource = &DC[DIR_0MP *size_MatC]; - f0MPsource = &DC[DIR_0PM *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_PPP *size_MatC]; - fMMPsource = &DC[DIR_PPM *size_MatC]; - fMPPsource = &DC[DIR_PMM *size_MatC]; - fMPMsource = &DC[DIR_PMP *size_MatC]; - fPPMsource = &DC[DIR_MMP *size_MatC]; - fPPPsource = &DC[DIR_MMM *size_MatC]; - fPMPsource = &DC[DIR_MPM *size_MatC]; - fPMMsource = &DC[DIR_MPP *size_MatC]; + fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -4091,8 +4091,8 @@ __global__ void scaleCF_AA2016_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -4109,96 +4109,96 @@ __global__ void scaleCF_AA2016_comp_27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -10974,8 +10974,8 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -10992,96 +10992,96 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -17849,8 +17849,8 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -17867,96 +17867,96 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -22133,8 +22133,8 @@ __global__ void scaleCF_staggered_time_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -22151,96 +22151,96 @@ __global__ void scaleCF_staggered_time_comp_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -26369,8 +26369,8 @@ __global__ void scaleCF_Fix_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -26387,96 +26387,96 @@ __global__ void scaleCF_Fix_comp_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -31136,8 +31136,8 @@ __global__ void scaleCF_NSPress_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -31154,96 +31154,96 @@ __global__ void scaleCF_NSPress_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -35080,8 +35080,8 @@ __global__ void scaleCF_Fix_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -35098,96 +35098,96 @@ __global__ void scaleCF_Fix_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -39338,8 +39338,8 @@ __global__ void scaleCFpress27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -39356,96 +39356,96 @@ __global__ void scaleCFpress27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -41012,8 +41012,8 @@ __global__ void scaleCFLast27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -41030,96 +41030,96 @@ __global__ void scaleCFLast27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -43249,8 +43249,8 @@ __global__ void scaleCFThSMG7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -43261,128 +43261,128 @@ __global__ void scaleCFThSMG7( real* DC, { real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + //fzeroF = &DF[DIR_000 * size_MatF]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions7 D7F; - D7F.f[0] = &DD7F[0*size_MatF]; - D7F.f[1] = &DD7F[1*size_MatF]; - D7F.f[2] = &DD7F[2*size_MatF]; - D7F.f[3] = &DD7F[3*size_MatF]; - D7F.f[4] = &DD7F[4*size_MatF]; - D7F.f[5] = &DD7F[5*size_MatF]; - D7F.f[6] = &DD7F[6*size_MatF]; + D7F.f[0] = &DD7F[0*numberOfLBnodesFine]; + D7F.f[1] = &DD7F[1*numberOfLBnodesFine]; + D7F.f[2] = &DD7F[2*numberOfLBnodesFine]; + D7F.f[3] = &DD7F[3*numberOfLBnodesFine]; + D7F.f[4] = &DD7F[4*numberOfLBnodesFine]; + D7F.f[5] = &DD7F[5*numberOfLBnodesFine]; + D7F.f[6] = &DD7F[6*numberOfLBnodesFine]; Distributions7 D7C; if (isEvenTimestep==true) { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[1] = &DD7C[1*size_MatC]; - D7C.f[2] = &DD7C[2*size_MatC]; - D7C.f[3] = &DD7C[3*size_MatC]; - D7C.f[4] = &DD7C[4*size_MatC]; - D7C.f[5] = &DD7C[5*size_MatC]; - D7C.f[6] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse]; } else { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[2] = &DD7C[1*size_MatC]; - D7C.f[1] = &DD7C[2*size_MatC]; - D7C.f[4] = &DD7C[3*size_MatC]; - D7C.f[3] = &DD7C[4*size_MatC]; - D7C.f[6] = &DD7C[5*size_MatC]; - D7C.f[5] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -44476,8 +44476,8 @@ __global__ void scaleCFThS7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -44487,128 +44487,128 @@ __global__ void scaleCFThS7( real* DC, { real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + //fzeroF = &DF[DIR_000 * size_MatF]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions7 D7F; - D7F.f[0] = &DD7F[0*size_MatF]; - D7F.f[1] = &DD7F[1*size_MatF]; - D7F.f[2] = &DD7F[2*size_MatF]; - D7F.f[3] = &DD7F[3*size_MatF]; - D7F.f[4] = &DD7F[4*size_MatF]; - D7F.f[5] = &DD7F[5*size_MatF]; - D7F.f[6] = &DD7F[6*size_MatF]; + D7F.f[0] = &DD7F[0*numberOfLBnodesFine]; + D7F.f[1] = &DD7F[1*numberOfLBnodesFine]; + D7F.f[2] = &DD7F[2*numberOfLBnodesFine]; + D7F.f[3] = &DD7F[3*numberOfLBnodesFine]; + D7F.f[4] = &DD7F[4*numberOfLBnodesFine]; + D7F.f[5] = &DD7F[5*numberOfLBnodesFine]; + D7F.f[6] = &DD7F[6*numberOfLBnodesFine]; Distributions7 D7C; if (isEvenTimestep==true) { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[1] = &DD7C[1*size_MatC]; - D7C.f[2] = &DD7C[2*size_MatC]; - D7C.f[3] = &DD7C[3*size_MatC]; - D7C.f[4] = &DD7C[4*size_MatC]; - D7C.f[5] = &DD7C[5*size_MatC]; - D7C.f[6] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse]; } else { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[2] = &DD7C[1*size_MatC]; - D7C.f[1] = &DD7C[2*size_MatC]; - D7C.f[4] = &DD7C[3*size_MatC]; - D7C.f[3] = &DD7C[4*size_MatC]; - D7C.f[6] = &DD7C[5*size_MatC]; - D7C.f[5] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -45599,8 +45599,8 @@ __global__ void scaleCFThS27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -45611,188 +45611,188 @@ __global__ void scaleCFThS27( real* DC, { real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + //fzeroF = &DF[DIR_000 * size_MatF]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + //fzeroC = &DC[DIR_000 * size_MatC]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions27 D27F; - D27F.f[DIR_P00 ] = &DD27F[DIR_P00 *size_MatF]; - D27F.f[DIR_M00 ] = &DD27F[DIR_M00 *size_MatF]; - D27F.f[DIR_0P0 ] = &DD27F[DIR_0P0 *size_MatF]; - D27F.f[DIR_0M0 ] = &DD27F[DIR_0M0 *size_MatF]; - D27F.f[DIR_00P ] = &DD27F[DIR_00P *size_MatF]; - D27F.f[DIR_00M ] = &DD27F[DIR_00M *size_MatF]; - D27F.f[DIR_PP0 ] = &DD27F[DIR_PP0 *size_MatF]; - D27F.f[DIR_MM0 ] = &DD27F[DIR_MM0 *size_MatF]; - D27F.f[DIR_PM0 ] = &DD27F[DIR_PM0 *size_MatF]; - D27F.f[DIR_MP0 ] = &DD27F[DIR_MP0 *size_MatF]; - D27F.f[DIR_P0P ] = &DD27F[DIR_P0P *size_MatF]; - D27F.f[DIR_M0M ] = &DD27F[DIR_M0M *size_MatF]; - D27F.f[DIR_P0M ] = &DD27F[DIR_P0M *size_MatF]; - D27F.f[DIR_M0P ] = &DD27F[DIR_M0P *size_MatF]; - D27F.f[DIR_0PP ] = &DD27F[DIR_0PP *size_MatF]; - D27F.f[DIR_0MM ] = &DD27F[DIR_0MM *size_MatF]; - D27F.f[DIR_0PM ] = &DD27F[DIR_0PM *size_MatF]; - D27F.f[DIR_0MP ] = &DD27F[DIR_0MP *size_MatF]; - D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF]; - D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF]; - D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF]; - D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF]; - D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF]; - D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF]; - D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF]; - D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF]; - D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF]; + D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine]; + D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine]; + D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine]; + D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine]; + D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine]; + D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine]; + D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine]; + D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine]; + D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine]; + D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine]; + D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine]; + D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine]; + D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine]; + D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine]; + D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine]; + D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine]; + D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine]; + D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine]; + D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine]; + D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine]; + D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine]; + D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine]; + D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine]; + D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine]; + D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine]; + D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine]; + D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine]; Distributions27 D27C; if (isEvenTimestep==true) { - D27C.f[DIR_P00 ] = &DD27C[DIR_P00 *size_MatC]; - D27C.f[DIR_M00 ] = &DD27C[DIR_M00 *size_MatC]; - D27C.f[DIR_0P0 ] = &DD27C[DIR_0P0 *size_MatC]; - D27C.f[DIR_0M0 ] = &DD27C[DIR_0M0 *size_MatC]; - D27C.f[DIR_00P ] = &DD27C[DIR_00P *size_MatC]; - D27C.f[DIR_00M ] = &DD27C[DIR_00M *size_MatC]; - D27C.f[DIR_PP0 ] = &DD27C[DIR_PP0 *size_MatC]; - D27C.f[DIR_MM0 ] = &DD27C[DIR_MM0 *size_MatC]; - D27C.f[DIR_PM0 ] = &DD27C[DIR_PM0 *size_MatC]; - D27C.f[DIR_MP0 ] = &DD27C[DIR_MP0 *size_MatC]; - D27C.f[DIR_P0P ] = &DD27C[DIR_P0P *size_MatC]; - D27C.f[DIR_M0M ] = &DD27C[DIR_M0M *size_MatC]; - D27C.f[DIR_P0M ] = &DD27C[DIR_P0M *size_MatC]; - D27C.f[DIR_M0P ] = &DD27C[DIR_M0P *size_MatC]; - D27C.f[DIR_0PP ] = &DD27C[DIR_0PP *size_MatC]; - D27C.f[DIR_0MM ] = &DD27C[DIR_0MM *size_MatC]; - D27C.f[DIR_0PM ] = &DD27C[DIR_0PM *size_MatC]; - D27C.f[DIR_0MP ] = &DD27C[DIR_0MP *size_MatC]; - D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC]; - D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC]; - D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC]; - D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC]; - D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC]; - D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC]; - D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC]; - D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC]; - D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC]; + D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse]; + D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse]; + D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse]; + D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse]; + D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse]; + D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse]; + D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse]; + D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse]; + D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse]; + D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse]; + D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse]; + D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse]; + D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse]; + D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse]; + D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse]; + D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse]; + D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse]; + D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse]; + D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse]; + D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse]; + D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse]; + D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse]; + D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse]; } else { - D27C.f[DIR_M00 ] = &DD27C[DIR_P00 *size_MatC]; - D27C.f[DIR_P00 ] = &DD27C[DIR_M00 *size_MatC]; - D27C.f[DIR_0M0 ] = &DD27C[DIR_0P0 *size_MatC]; - D27C.f[DIR_0P0 ] = &DD27C[DIR_0M0 *size_MatC]; - D27C.f[DIR_00M ] = &DD27C[DIR_00P *size_MatC]; - D27C.f[DIR_00P ] = &DD27C[DIR_00M *size_MatC]; - D27C.f[DIR_MM0 ] = &DD27C[DIR_PP0 *size_MatC]; - D27C.f[DIR_PP0 ] = &DD27C[DIR_MM0 *size_MatC]; - D27C.f[DIR_MP0 ] = &DD27C[DIR_PM0 *size_MatC]; - D27C.f[DIR_PM0 ] = &DD27C[DIR_MP0 *size_MatC]; - D27C.f[DIR_M0M ] = &DD27C[DIR_P0P *size_MatC]; - D27C.f[DIR_P0P ] = &DD27C[DIR_M0M *size_MatC]; - D27C.f[DIR_M0P ] = &DD27C[DIR_P0M *size_MatC]; - D27C.f[DIR_P0M ] = &DD27C[DIR_M0P *size_MatC]; - D27C.f[DIR_0MM ] = &DD27C[DIR_0PP *size_MatC]; - D27C.f[DIR_0PP ] = &DD27C[DIR_0MM *size_MatC]; - D27C.f[DIR_0MP ] = &DD27C[DIR_0PM *size_MatC]; - D27C.f[DIR_0PM ] = &DD27C[DIR_0MP *size_MatC]; - D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC]; - D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC]; - D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC]; - D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC]; - D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC]; - D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC]; - D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC]; - D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC]; - D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC]; + D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse]; + D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse]; + D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse]; + D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse]; + D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse]; + D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse]; + D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse]; + D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse]; + D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse]; + D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse]; + D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse]; + D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse]; + D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse]; + D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse]; + D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse]; + D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse]; + D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse]; + D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse]; + D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse]; + D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse]; + D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse]; + D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse]; + D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -45892,33 +45892,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -45979,33 +45979,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46066,33 +46066,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46153,33 +46153,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46250,33 +46250,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46337,33 +46337,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46424,33 +46424,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46511,33 +46511,33 @@ __global__ void scaleCFThS27( real* DC, f_BSE = fbseC[kbs]; f_BNW = fbnwC[kbw]; //////////////////////////////////////////////////////////////////////////////// - f27E = (D27C.f[DIR_P00 ])[kzero];//ke - f27W = (D27C.f[DIR_M00 ])[kw ]; - f27N = (D27C.f[DIR_0P0 ])[kzero];//kn - f27S = (D27C.f[DIR_0M0 ])[ks ]; - f27T = (D27C.f[DIR_00P ])[kzero];//kt - f27B = (D27C.f[DIR_00M ])[kb ]; - f27NE = (D27C.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27C.f[DIR_MM0 ])[ksw ]; - f27SE = (D27C.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27C.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27C.f[DIR_P0P ])[kzero];//kte - f27BW = (D27C.f[DIR_M0M ])[kbw ]; - f27BE = (D27C.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27C.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27C.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27C.f[DIR_0MM ])[kbs ]; - f27BN = (D27C.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27C.f[DIR_0MP ])[ks ];//kts + f27E = (D27C.f[DIR_P00])[kzero];//ke + f27W = (D27C.f[DIR_M00])[kw ]; + f27N = (D27C.f[DIR_0P0])[kzero];//kn + f27S = (D27C.f[DIR_0M0])[ks ]; + f27T = (D27C.f[DIR_00P])[kzero];//kt + f27B = (D27C.f[DIR_00M])[kb ]; + f27NE = (D27C.f[DIR_PP0])[kzero];//kne + f27SW = (D27C.f[DIR_MM0])[ksw ]; + f27SE = (D27C.f[DIR_PM0])[ks ];//kse + f27NW = (D27C.f[DIR_MP0])[kw ];//knw + f27TE = (D27C.f[DIR_P0P])[kzero];//kte + f27BW = (D27C.f[DIR_M0M])[kbw ]; + f27BE = (D27C.f[DIR_P0M])[kb ];//kbe + f27TW = (D27C.f[DIR_M0P])[kw ];//ktw + f27TN = (D27C.f[DIR_0PP])[kzero];//ktn + f27BS = (D27C.f[DIR_0MM])[kbs ]; + f27BN = (D27C.f[DIR_0PM])[kb ];//kbn + f27TS = (D27C.f[DIR_0MP])[ks ];//kts f27ZERO = (D27C.f[DIR_000])[kzero];//kzero - f27TNE = (D27C.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27C.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27C.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27C.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27C.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27C.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27C.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27C.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27C.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27C.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27C.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27C.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27C.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27C.f[DIR_MMM])[kbsw ]; + f27BSE = (D27C.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27C.f[DIR_MPM])[kbw ];//kbnw Conc_C_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -46656,32 +46656,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -46734,32 +46734,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -46812,32 +46812,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -46890,32 +46890,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -46978,32 +46978,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -47056,32 +47056,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -47134,32 +47134,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); @@ -47212,32 +47212,32 @@ __global__ void scaleCFThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27F.f[DIR_000])[kzero] = c8o27* Conc_F*(c1o1-cu_sq); - (D27F.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27F.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27F.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27F.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27F.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27F.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27F.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27F.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27F.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27F.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27F.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27F.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27F.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27F.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27F.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27F.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27F.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27F.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27F.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27F.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27F.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27F.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27F.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_F*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27F.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_F*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27F.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_F*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27F.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_F*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27F.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_F*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27F.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_F*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27F.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_F*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27F.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_F*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27F.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_F*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27F.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_F*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27F.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_F*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27F.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_F*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27F.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_F*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27F.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_F*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27F.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_F*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27F.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_F*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27F.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_F*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27F.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_F*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27F.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27F.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27F.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27F.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27F.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -47287,8 +47287,8 @@ __global__ void scaleCFEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -47305,96 +47305,96 @@ __global__ void scaleCFEff27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -48997,8 +48997,8 @@ __global__ void scaleCF27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -49014,96 +49014,96 @@ __global__ void scaleCF27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu index cb8bd2a322cc9176cd0aa31625ee386e1f62d63d..386493280fd71fff93c117483e754a248bb0830d 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu @@ -23,8 +23,8 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -44,33 +44,33 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC, *f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest; - fP00dest = &DF[DIR_P00 *size_MatF]; - fM00dest = &DF[DIR_M00 *size_MatF]; - f0P0dest = &DF[DIR_0P0 *size_MatF]; - f0M0dest = &DF[DIR_0M0 *size_MatF]; - f00Pdest = &DF[DIR_00P *size_MatF]; - f00Mdest = &DF[DIR_00M *size_MatF]; - fPP0dest = &DF[DIR_PP0 *size_MatF]; - fMM0dest = &DF[DIR_MM0 *size_MatF]; - fPM0dest = &DF[DIR_PM0 *size_MatF]; - fMP0dest = &DF[DIR_MP0 *size_MatF]; - fP0Pdest = &DF[DIR_P0P *size_MatF]; - fM0Mdest = &DF[DIR_M0M *size_MatF]; - fP0Mdest = &DF[DIR_P0M *size_MatF]; - fM0Pdest = &DF[DIR_M0P *size_MatF]; - f0PPdest = &DF[DIR_0PP *size_MatF]; - f0MMdest = &DF[DIR_0MM *size_MatF]; - f0PMdest = &DF[DIR_0PM *size_MatF]; - f0MPdest = &DF[DIR_0MP *size_MatF]; - f000dest = &DF[DIR_000*size_MatF]; - fMMMdest = &DF[DIR_MMM *size_MatF]; - fMMPdest = &DF[DIR_MMP *size_MatF]; - fMPPdest = &DF[DIR_MPP *size_MatF]; - fMPMdest = &DF[DIR_MPM *size_MatF]; - fPPMdest = &DF[DIR_PPM *size_MatF]; - fPPPdest = &DF[DIR_PPP *size_MatF]; - fPMPdest = &DF[DIR_PMP *size_MatF]; - fPMMdest = &DF[DIR_PMM *size_MatF]; + fP00dest = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00dest = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Pdest = &DF[DIR_00P * numberOfLBnodesFine]; + f00Mdest = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine]; + f000dest = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source, @@ -79,72 +79,72 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC, if (isEvenTimestep == true) { - fP00source = &DC[DIR_P00 *size_MatC]; - fM00source = &DC[DIR_M00 *size_MatC]; - f0P0source = &DC[DIR_0P0 *size_MatC]; - f0M0source = &DC[DIR_0M0 *size_MatC]; - f00Psource = &DC[DIR_00P *size_MatC]; - f00Msource = &DC[DIR_00M *size_MatC]; - fPP0source = &DC[DIR_PP0 *size_MatC]; - fMM0source = &DC[DIR_MM0 *size_MatC]; - fPM0source = &DC[DIR_PM0 *size_MatC]; - fMP0source = &DC[DIR_MP0 *size_MatC]; - fP0Psource = &DC[DIR_P0P *size_MatC]; - fM0Msource = &DC[DIR_M0M *size_MatC]; - fP0Msource = &DC[DIR_P0M *size_MatC]; - fM0Psource = &DC[DIR_M0P *size_MatC]; - f0PPsource = &DC[DIR_0PP *size_MatC]; - f0MMsource = &DC[DIR_0MM *size_MatC]; - f0PMsource = &DC[DIR_0PM *size_MatC]; - f0MPsource = &DC[DIR_0MP *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_MMM *size_MatC]; - fMMPsource = &DC[DIR_MMP *size_MatC]; - fMPPsource = &DC[DIR_MPP *size_MatC]; - fMPMsource = &DC[DIR_MPM *size_MatC]; - fPPMsource = &DC[DIR_PPM *size_MatC]; - fPPPsource = &DC[DIR_PPP *size_MatC]; - fPMPsource = &DC[DIR_PMP *size_MatC]; - fPMMsource = &DC[DIR_PMM *size_MatC]; + fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00source = &DC[DIR_M00 *size_MatC]; - fM00source = &DC[DIR_P00 *size_MatC]; - f0P0source = &DC[DIR_0M0 *size_MatC]; - f0M0source = &DC[DIR_0P0 *size_MatC]; - f00Psource = &DC[DIR_00M *size_MatC]; - f00Msource = &DC[DIR_00P *size_MatC]; - fPP0source = &DC[DIR_MM0 *size_MatC]; - fMM0source = &DC[DIR_PP0 *size_MatC]; - fPM0source = &DC[DIR_MP0 *size_MatC]; - fMP0source = &DC[DIR_PM0 *size_MatC]; - fP0Psource = &DC[DIR_M0M *size_MatC]; - fM0Msource = &DC[DIR_P0P *size_MatC]; - fP0Msource = &DC[DIR_M0P *size_MatC]; - fM0Psource = &DC[DIR_P0M *size_MatC]; - f0PPsource = &DC[DIR_0MM *size_MatC]; - f0MMsource = &DC[DIR_0PP *size_MatC]; - f0PMsource = &DC[DIR_0MP *size_MatC]; - f0MPsource = &DC[DIR_0PM *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_PPP *size_MatC]; - fMMPsource = &DC[DIR_PPM *size_MatC]; - fMPPsource = &DC[DIR_PMM *size_MatC]; - fMPMsource = &DC[DIR_PMP *size_MatC]; - fPPMsource = &DC[DIR_MMP *size_MatC]; - fPPPsource = &DC[DIR_MMM *size_MatC]; - fPMPsource = &DC[DIR_MPM *size_MatC]; - fPMMsource = &DC[DIR_MPP *size_MatC]; + fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; } Distributions6 G; - G.g[DIR_P00] = &G6[DIR_P00 *size_MatF]; - G.g[DIR_M00] = &G6[DIR_M00 *size_MatF]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_MatF]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_MatF]; - G.g[DIR_00P] = &G6[DIR_00P *size_MatF]; - G.g[DIR_00M] = &G6[DIR_00M *size_MatF]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine]; //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -4370,8 +4370,8 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posCSWB, unsigned int* posFSWB, @@ -4391,33 +4391,33 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC, *f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest; - fP00dest = &DF[DIR_P00 *size_MatF]; - fM00dest = &DF[DIR_M00 *size_MatF]; - f0P0dest = &DF[DIR_0P0 *size_MatF]; - f0M0dest = &DF[DIR_0M0 *size_MatF]; - f00Pdest = &DF[DIR_00P *size_MatF]; - f00Mdest = &DF[DIR_00M *size_MatF]; - fPP0dest = &DF[DIR_PP0 *size_MatF]; - fMM0dest = &DF[DIR_MM0 *size_MatF]; - fPM0dest = &DF[DIR_PM0 *size_MatF]; - fMP0dest = &DF[DIR_MP0 *size_MatF]; - fP0Pdest = &DF[DIR_P0P *size_MatF]; - fM0Mdest = &DF[DIR_M0M *size_MatF]; - fP0Mdest = &DF[DIR_P0M *size_MatF]; - fM0Pdest = &DF[DIR_M0P *size_MatF]; - f0PPdest = &DF[DIR_0PP *size_MatF]; - f0MMdest = &DF[DIR_0MM *size_MatF]; - f0PMdest = &DF[DIR_0PM *size_MatF]; - f0MPdest = &DF[DIR_0MP *size_MatF]; - f000dest = &DF[DIR_000*size_MatF]; - fMMMdest = &DF[DIR_MMM *size_MatF]; - fMMPdest = &DF[DIR_MMP *size_MatF]; - fMPPdest = &DF[DIR_MPP *size_MatF]; - fMPMdest = &DF[DIR_MPM *size_MatF]; - fPPMdest = &DF[DIR_PPM *size_MatF]; - fPPPdest = &DF[DIR_PPP *size_MatF]; - fPMPdest = &DF[DIR_PMP *size_MatF]; - fPMMdest = &DF[DIR_PMM *size_MatF]; + fP00dest = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00dest = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Pdest = &DF[DIR_00P * numberOfLBnodesFine]; + f00Mdest = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine]; + f000dest = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source, @@ -4426,72 +4426,72 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC, if (isEvenTimestep == true) { - fP00source = &DC[DIR_P00 *size_MatC]; - fM00source = &DC[DIR_M00 *size_MatC]; - f0P0source = &DC[DIR_0P0 *size_MatC]; - f0M0source = &DC[DIR_0M0 *size_MatC]; - f00Psource = &DC[DIR_00P *size_MatC]; - f00Msource = &DC[DIR_00M *size_MatC]; - fPP0source = &DC[DIR_PP0 *size_MatC]; - fMM0source = &DC[DIR_MM0 *size_MatC]; - fPM0source = &DC[DIR_PM0 *size_MatC]; - fMP0source = &DC[DIR_MP0 *size_MatC]; - fP0Psource = &DC[DIR_P0P *size_MatC]; - fM0Msource = &DC[DIR_M0M *size_MatC]; - fP0Msource = &DC[DIR_P0M *size_MatC]; - fM0Psource = &DC[DIR_M0P *size_MatC]; - f0PPsource = &DC[DIR_0PP *size_MatC]; - f0MMsource = &DC[DIR_0MM *size_MatC]; - f0PMsource = &DC[DIR_0PM *size_MatC]; - f0MPsource = &DC[DIR_0MP *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_MMM *size_MatC]; - fMMPsource = &DC[DIR_MMP *size_MatC]; - fMPPsource = &DC[DIR_MPP *size_MatC]; - fMPMsource = &DC[DIR_MPM *size_MatC]; - fPPMsource = &DC[DIR_PPM *size_MatC]; - fPPPsource = &DC[DIR_PPP *size_MatC]; - fPMPsource = &DC[DIR_PMP *size_MatC]; - fPMMsource = &DC[DIR_PMM *size_MatC]; + fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00source = &DC[DIR_M00 *size_MatC]; - fM00source = &DC[DIR_P00 *size_MatC]; - f0P0source = &DC[DIR_0M0 *size_MatC]; - f0M0source = &DC[DIR_0P0 *size_MatC]; - f00Psource = &DC[DIR_00M *size_MatC]; - f00Msource = &DC[DIR_00P *size_MatC]; - fPP0source = &DC[DIR_MM0 *size_MatC]; - fMM0source = &DC[DIR_PP0 *size_MatC]; - fPM0source = &DC[DIR_MP0 *size_MatC]; - fMP0source = &DC[DIR_PM0 *size_MatC]; - fP0Psource = &DC[DIR_M0M *size_MatC]; - fM0Msource = &DC[DIR_P0P *size_MatC]; - fP0Msource = &DC[DIR_M0P *size_MatC]; - fM0Psource = &DC[DIR_P0M *size_MatC]; - f0PPsource = &DC[DIR_0MM *size_MatC]; - f0MMsource = &DC[DIR_0PP *size_MatC]; - f0PMsource = &DC[DIR_0MP *size_MatC]; - f0MPsource = &DC[DIR_0PM *size_MatC]; - f000source = &DC[DIR_000*size_MatC]; - fMMMsource = &DC[DIR_PPP *size_MatC]; - fMMPsource = &DC[DIR_PPM *size_MatC]; - fMPPsource = &DC[DIR_PMM *size_MatC]; - fMPMsource = &DC[DIR_PMP *size_MatC]; - fPPMsource = &DC[DIR_MMP *size_MatC]; - fPPPsource = &DC[DIR_MMM *size_MatC]; - fPMPsource = &DC[DIR_MPM *size_MatC]; - fPMMsource = &DC[DIR_MPP *size_MatC]; + fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000source = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse]; } Distributions6 G; - G.g[DIR_P00] = &G6[DIR_P00 *size_MatF]; - G.g[DIR_M00] = &G6[DIR_M00 *size_MatF]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_MatF]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_MatF]; - G.g[DIR_00P] = &G6[DIR_00P *size_MatF]; - G.g[DIR_00M] = &G6[DIR_00M *size_MatF]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine]; //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu index f2a66876cf39e3519e22fc2b0e236514f05ce85a..b37ab44d81d15fbbde46c875c860acd7198b8041 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu @@ -22,8 +22,8 @@ __global__ void scaleFC_0817_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -43,33 +43,33 @@ __global__ void scaleFC_0817_comp_27( real* DC, *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource; - fP00source = &DF[DIR_P00 *size_MatF]; - fM00source = &DF[DIR_M00 *size_MatF]; - f0P0source = &DF[DIR_0P0 *size_MatF]; - f0M0source = &DF[DIR_0M0 *size_MatF]; - f00Psource = &DF[DIR_00P *size_MatF]; - f00Msource = &DF[DIR_00M *size_MatF]; - fPP0source = &DF[DIR_PP0 *size_MatF]; - fMM0source = &DF[DIR_MM0 *size_MatF]; - fPM0source = &DF[DIR_PM0 *size_MatF]; - fMP0source = &DF[DIR_MP0 *size_MatF]; - fP0Psource = &DF[DIR_P0P *size_MatF]; - fM0Msource = &DF[DIR_M0M *size_MatF]; - fP0Msource = &DF[DIR_P0M *size_MatF]; - fM0Psource = &DF[DIR_M0P *size_MatF]; - f0PPsource = &DF[DIR_0PP *size_MatF]; - f0MMsource = &DF[DIR_0MM *size_MatF]; - f0PMsource = &DF[DIR_0PM *size_MatF]; - f0MPsource = &DF[DIR_0MP *size_MatF]; - f000source = &DF[DIR_000*size_MatF]; - fMMMsource = &DF[DIR_MMM *size_MatF]; - fMMPsource = &DF[DIR_MMP *size_MatF]; - fMPPsource = &DF[DIR_MPP *size_MatF]; - fMPMsource = &DF[DIR_MPM *size_MatF]; - fPPMsource = &DF[DIR_PPM *size_MatF]; - fPPPsource = &DF[DIR_PPP *size_MatF]; - fPMPsource = &DF[DIR_PMP *size_MatF]; - fPMMsource = &DF[DIR_PMM *size_MatF]; + fP00source = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00source = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Psource = &DF[DIR_00P * numberOfLBnodesFine]; + f00Msource = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine]; + f000source = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest, @@ -78,63 +78,63 @@ __global__ void scaleFC_0817_comp_27( real* DC, if (isEvenTimestep==true) { - fP00dest = &DC[DIR_P00 *size_MatC]; - fM00dest = &DC[DIR_M00 *size_MatC]; - f0P0dest = &DC[DIR_0P0 *size_MatC]; - f0M0dest = &DC[DIR_0M0 *size_MatC]; - f00Pdest = &DC[DIR_00P *size_MatC]; - f00Mdest = &DC[DIR_00M *size_MatC]; - fPP0dest = &DC[DIR_PP0 *size_MatC]; - fMM0dest = &DC[DIR_MM0 *size_MatC]; - fPM0dest = &DC[DIR_PM0 *size_MatC]; - fMP0dest = &DC[DIR_MP0 *size_MatC]; - fP0Pdest = &DC[DIR_P0P *size_MatC]; - fM0Mdest = &DC[DIR_M0M *size_MatC]; - fP0Mdest = &DC[DIR_P0M *size_MatC]; - fM0Pdest = &DC[DIR_M0P *size_MatC]; - f0PPdest = &DC[DIR_0PP *size_MatC]; - f0MMdest = &DC[DIR_0MM *size_MatC]; - f0PMdest = &DC[DIR_0PM *size_MatC]; - f0MPdest = &DC[DIR_0MP *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_MMM *size_MatC]; - fMMPdest = &DC[DIR_MMP *size_MatC]; - fMPPdest = &DC[DIR_MPP *size_MatC]; - fMPMdest = &DC[DIR_MPM *size_MatC]; - fPPMdest = &DC[DIR_PPM *size_MatC]; - fPPPdest = &DC[DIR_PPP *size_MatC]; - fPMPdest = &DC[DIR_PMP *size_MatC]; - fPMMdest = &DC[DIR_PMM *size_MatC]; + fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00dest = &DC[DIR_M00 *size_MatC]; - fM00dest = &DC[DIR_P00 *size_MatC]; - f0P0dest = &DC[DIR_0M0 *size_MatC]; - f0M0dest = &DC[DIR_0P0 *size_MatC]; - f00Pdest = &DC[DIR_00M *size_MatC]; - f00Mdest = &DC[DIR_00P *size_MatC]; - fPP0dest = &DC[DIR_MM0 *size_MatC]; - fMM0dest = &DC[DIR_PP0 *size_MatC]; - fPM0dest = &DC[DIR_MP0 *size_MatC]; - fMP0dest = &DC[DIR_PM0 *size_MatC]; - fP0Pdest = &DC[DIR_M0M *size_MatC]; - fM0Mdest = &DC[DIR_P0P *size_MatC]; - fP0Mdest = &DC[DIR_M0P *size_MatC]; - fM0Pdest = &DC[DIR_P0M *size_MatC]; - f0PPdest = &DC[DIR_0MM *size_MatC]; - f0MMdest = &DC[DIR_0PP *size_MatC]; - f0PMdest = &DC[DIR_0MP *size_MatC]; - f0MPdest = &DC[DIR_0PM *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_PPP *size_MatC]; - fMMPdest = &DC[DIR_PPM *size_MatC]; - fMPPdest = &DC[DIR_PMM *size_MatC]; - fMPMdest = &DC[DIR_PMP *size_MatC]; - fPPMdest = &DC[DIR_MMP *size_MatC]; - fPPPdest = &DC[DIR_MMM *size_MatC]; - fPMPdest = &DC[DIR_MPM *size_MatC]; - fPMMdest = &DC[DIR_MPP *size_MatC]; + fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -1218,8 +1218,8 @@ __global__ void scaleFC_AA2016_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1236,96 +1236,96 @@ __global__ void scaleFC_AA2016_comp_27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -5407,8 +5407,8 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -5425,96 +5425,96 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -9587,103 +9587,120 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, ////////////////////////////////////////////////////////////////////////// -__device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY, - unsigned int *neighborCZ, unsigned int *neighborFX, unsigned int *neighborFY, - unsigned int *neighborFZ, unsigned int size_MatC, unsigned int size_MatF, - bool isEvenTimestep, unsigned int *posC, unsigned int *posFSWB, unsigned int kFC, - real omCoarse, real omFine, real nu, unsigned int nxC, unsigned int nyC, - unsigned int nxF, unsigned int nyF, OffFC offFC, const unsigned k) +__device__ void scaleFC_RhoSq_comp_27_Calculation( + real *DC, real *DF, + unsigned int *neighborCX, + unsigned int *neighborCY, + unsigned int *neighborCZ, + unsigned int *neighborFX, + unsigned int *neighborFY, + unsigned int *neighborFZ, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, + bool isEvenTimestep, + unsigned int *posC, + unsigned int *posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + OffFC offFC, + const unsigned k) { real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 * size_MatF]; - fwF = &DF[DIR_M00 * size_MatF]; - fnF = &DF[DIR_0P0 * size_MatF]; - fsF = &DF[DIR_0M0 * size_MatF]; - ftF = &DF[DIR_00P * size_MatF]; - fbF = &DF[DIR_00M * size_MatF]; - fneF = &DF[DIR_PP0 * size_MatF]; - fswF = &DF[DIR_MM0 * size_MatF]; - fseF = &DF[DIR_PM0 * size_MatF]; - fnwF = &DF[DIR_MP0 * size_MatF]; - fteF = &DF[DIR_P0P * size_MatF]; - fbwF = &DF[DIR_M0M * size_MatF]; - fbeF = &DF[DIR_P0M * size_MatF]; - ftwF = &DF[DIR_M0P * size_MatF]; - ftnF = &DF[DIR_0PP * size_MatF]; - fbsF = &DF[DIR_0MM * size_MatF]; - fbnF = &DF[DIR_0PM * size_MatF]; - ftsF = &DF[DIR_0MP * size_MatF]; - fzeroF = &DF[DIR_000 * size_MatF]; - ftneF = &DF[DIR_PPP * size_MatF]; - ftswF = &DF[DIR_MMP * size_MatF]; - ftseF = &DF[DIR_PMP * size_MatF]; - ftnwF = &DF[DIR_MPP * size_MatF]; - fbneF = &DF[DIR_PPM * size_MatF]; - fbswF = &DF[DIR_MMM * size_MatF]; - fbseF = &DF[DIR_PMM * size_MatF]; - fbnwF = &DF[DIR_MPM * size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep == true) { - feC = &DC[DIR_P00 * size_MatC]; - fwC = &DC[DIR_M00 * size_MatC]; - fnC = &DC[DIR_0P0 * size_MatC]; - fsC = &DC[DIR_0M0 * size_MatC]; - ftC = &DC[DIR_00P * size_MatC]; - fbC = &DC[DIR_00M * size_MatC]; - fneC = &DC[DIR_PP0 * size_MatC]; - fswC = &DC[DIR_MM0 * size_MatC]; - fseC = &DC[DIR_PM0 * size_MatC]; - fnwC = &DC[DIR_MP0 * size_MatC]; - fteC = &DC[DIR_P0P * size_MatC]; - fbwC = &DC[DIR_M0M * size_MatC]; - fbeC = &DC[DIR_P0M * size_MatC]; - ftwC = &DC[DIR_M0P * size_MatC]; - ftnC = &DC[DIR_0PP * size_MatC]; - fbsC = &DC[DIR_0MM * size_MatC]; - fbnC = &DC[DIR_0PM * size_MatC]; - ftsC = &DC[DIR_0MP * size_MatC]; - fzeroC = &DC[DIR_000 * size_MatC]; - ftneC = &DC[DIR_PPP * size_MatC]; - ftswC = &DC[DIR_MMP * size_MatC]; - ftseC = &DC[DIR_PMP * size_MatC]; - ftnwC = &DC[DIR_MPP * size_MatC]; - fbneC = &DC[DIR_PPM * size_MatC]; - fbswC = &DC[DIR_MMM * size_MatC]; - fbseC = &DC[DIR_PMM * size_MatC]; - fbnwC = &DC[DIR_MPM * size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 * size_MatC]; - feC = &DC[DIR_M00 * size_MatC]; - fsC = &DC[DIR_0P0 * size_MatC]; - fnC = &DC[DIR_0M0 * size_MatC]; - fbC = &DC[DIR_00P * size_MatC]; - ftC = &DC[DIR_00M * size_MatC]; - fswC = &DC[DIR_PP0 * size_MatC]; - fneC = &DC[DIR_MM0 * size_MatC]; - fnwC = &DC[DIR_PM0 * size_MatC]; - fseC = &DC[DIR_MP0 * size_MatC]; - fbwC = &DC[DIR_P0P * size_MatC]; - fteC = &DC[DIR_M0M * size_MatC]; - ftwC = &DC[DIR_P0M * size_MatC]; - fbeC = &DC[DIR_M0P * size_MatC]; - fbsC = &DC[DIR_0PP * size_MatC]; - ftnC = &DC[DIR_0MM * size_MatC]; - ftsC = &DC[DIR_0PM * size_MatC]; - fbnC = &DC[DIR_0MP * size_MatC]; - fzeroC = &DC[DIR_000 * size_MatC]; - fbswC = &DC[DIR_PPP * size_MatC]; - fbneC = &DC[DIR_MMP * size_MatC]; - fbnwC = &DC[DIR_PMP * size_MatC]; - fbseC = &DC[DIR_MPP * size_MatC]; - ftswC = &DC[DIR_PPM * size_MatC]; - ftneC = &DC[DIR_MMM * size_MatC]; - ftnwC = &DC[DIR_PMM * size_MatC]; - ftseC = &DC[DIR_MPM * size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -11064,8 +11081,8 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -11091,7 +11108,7 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC, ////////////////////////////////////////////////////////////////////////// scaleFC_RhoSq_comp_27_Calculation(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, neighborFZ, - size_MatC, size_MatF, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC, + numberOfLBnodesCoarse, numberOfLBnodesFine, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC, nyC, nxF, nyF, offFC, k); } @@ -11157,8 +11174,8 @@ __global__ void scaleFC_staggered_time_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -11175,96 +11192,96 @@ __global__ void scaleFC_staggered_time_comp_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -11755,827 +11772,6 @@ __global__ void scaleFC_staggered_time_comp_27( real* DC, kxxMyyFromfcNEQ_NEB = -c3o2*omegaS *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB))); kxxMzzFromfcNEQ_NEB = -c3o2*omegaS *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB))); - // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - ////pointertausch - // if (isEvenTimestep==false) - // { - // feC = &DC[DIR_P00 *size_MatC]; - // fwC = &DC[DIR_M00 *size_MatC]; - // fnC = &DC[DIR_0P0 *size_MatC]; - // fsC = &DC[DIR_0M0 *size_MatC]; - // ftC = &DC[DIR_00P *size_MatC]; - // fbC = &DC[DIR_00M *size_MatC]; - // fneC = &DC[DIR_PP0 *size_MatC]; - // fswC = &DC[DIR_MM0 *size_MatC]; - // fseC = &DC[DIR_PM0 *size_MatC]; - // fnwC = &DC[DIR_MP0 *size_MatC]; - // fteC = &DC[DIR_P0P *size_MatC]; - // fbwC = &DC[DIR_M0M *size_MatC]; - // fbeC = &DC[DIR_P0M *size_MatC]; - // ftwC = &DC[DIR_M0P *size_MatC]; - // ftnC = &DC[DIR_0PP *size_MatC]; - // fbsC = &DC[DIR_0MM *size_MatC]; - // fbnC = &DC[DIR_0PM *size_MatC]; - // ftsC = &DC[DIR_0MP *size_MatC]; - // fzeroC = &DC[DIR_000*size_MatC]; - // ftneC = &DC[DIR_PPP *size_MatC]; - // ftswC = &DC[DIR_MMP *size_MatC]; - // ftseC = &DC[DIR_PMP *size_MatC]; - // ftnwC = &DC[DIR_MPP *size_MatC]; - // fbneC = &DC[DIR_PPM *size_MatC]; - // fbswC = &DC[DIR_MMM *size_MatC]; - // fbseC = &DC[DIR_PMM *size_MatC]; - // fbnwC = &DC[DIR_MPM *size_MatC]; - // } - // else - // { - // fwC = &DC[DIR_P00 *size_MatC]; - // feC = &DC[DIR_M00 *size_MatC]; - // fsC = &DC[DIR_0P0 *size_MatC]; - // fnC = &DC[DIR_0M0 *size_MatC]; - // fbC = &DC[DIR_00P *size_MatC]; - // ftC = &DC[DIR_00M *size_MatC]; - // fswC = &DC[DIR_PP0 *size_MatC]; - // fneC = &DC[DIR_MM0 *size_MatC]; - // fnwC = &DC[DIR_PM0 *size_MatC]; - // fseC = &DC[DIR_MP0 *size_MatC]; - // fbwC = &DC[DIR_P0P *size_MatC]; - // fteC = &DC[DIR_M0M *size_MatC]; - // ftwC = &DC[DIR_P0M *size_MatC]; - // fbeC = &DC[DIR_M0P *size_MatC]; - // fbsC = &DC[DIR_0PP *size_MatC]; - // ftnC = &DC[DIR_0MM *size_MatC]; - // ftsC = &DC[DIR_0PM *size_MatC]; - // fbnC = &DC[DIR_0MP *size_MatC]; - // fzeroC = &DC[DIR_000*size_MatC]; - // fbswC = &DC[DIR_PPP *size_MatC]; - // fbneC = &DC[DIR_MMP *size_MatC]; - // fbnwC = &DC[DIR_PMP *size_MatC]; - // fbseC = &DC[DIR_MPP *size_MatC]; - // ftswC = &DC[DIR_PPM *size_MatC]; - // ftneC = &DC[DIR_MMM *size_MatC]; - // ftnwC = &DC[DIR_PMM *size_MatC]; - // ftseC = &DC[DIR_MPM *size_MatC]; - // } - - // real rho_tmp; - //real vx1_tmp; - //real vx2_tmp; - //real vx3_tmp; - - // ////////////////////////////////////////////////////////////////////////// - // xoff = offFC.xOffFC[k]; - // yoff = offFC.yOffFC[k]; - // zoff = offFC.zOffFC[k]; - // xoff_sq = xoff * xoff; - // yoff_sq = yoff * yoff; - // zoff_sq = zoff * zoff; - // ////////////////////////////////////////////////////////////////////////// - // //SWB// - // ////////////////////////////////////////////////////////////////////////// - // //index 0 - // k0zero= posFSWB[k]; - // k0w = neighborFX[k0zero]; - // k0s = neighborFY[k0zero]; - // k0b = neighborFZ[k0zero]; - // k0sw = neighborFY[k0w]; - // k0bw = neighborFZ[k0w]; - // k0bs = neighborFZ[k0s]; - // k0bsw = neighborFZ[k0sw]; - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= k0zero; - // kw = k0w; - // ks = k0s; - // kb = k0b; - // ksw = k0sw; - // kbw = k0bw; - // kbs = k0bs; - // kbsw = k0bsw; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_SWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_SWB = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWB); - ////vx2_SWB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWB); - ////vx3_SWB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWB); - - // //kxyFromfcNEQ_SWB = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx2_SWB))); - // //kyzFromfcNEQ_SWB = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWB) - ((vx2_SWB*vx3_SWB))); - // //kxzFromfcNEQ_SWB = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx3_SWB))); - // //kxxMyyFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx2_SWB*vx2_SWB))); - // //kxxMzzFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx3_SWB*vx3_SWB))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_SWB += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_SWB += vx1_tmp; - //vx2_SWB += vx2_tmp; - //vx3_SWB += vx3_tmp; - - // drho_SWB *= c1o2; - // vx1_SWB *= c1o2; - //vx2_SWB *= c1o2; - //vx3_SWB *= c1o2; - - // kxyFromfcNEQ_SWB += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_SWB += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_SWB += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_SWB *= c1o2; - //kyzFromfcNEQ_SWB *= c1o2; - //kxzFromfcNEQ_SWB *= c1o2; - //kxxMyyFromfcNEQ_SWB *= c1o2; - //kxxMzzFromfcNEQ_SWB *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //SWT// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= kb; - // kw = kbw; - // ks = kbs; - // kb = neighborFZ[kb]; - // ksw = kbsw; - // kbw = neighborFZ[kbw]; - // kbs = neighborFZ[kbs]; - // kbsw = neighborFZ[kbsw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_SWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_SWT = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWT); - ////vx2_SWT = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWT); - ////vx3_SWT = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWT); - - // //kxyFromfcNEQ_SWT = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx2_SWT))); - // //kyzFromfcNEQ_SWT = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWT) - ((vx2_SWT*vx3_SWT))); - // //kxzFromfcNEQ_SWT = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx3_SWT))); - // //kxxMyyFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx2_SWT*vx2_SWT))); - // //kxxMzzFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx3_SWT*vx3_SWT))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_SWT += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_SWT += vx1_tmp; - //vx2_SWT += vx2_tmp; - //vx3_SWT += vx3_tmp; - - // drho_SWT *= c1o2; - // vx1_SWT *= c1o2; - //vx2_SWT *= c1o2; - //vx3_SWT *= c1o2; - - // kxyFromfcNEQ_SWT += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_SWT += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_SWT += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_SWT *= c1o2; - //kyzFromfcNEQ_SWT *= c1o2; - //kxzFromfcNEQ_SWT *= c1o2; - //kxxMyyFromfcNEQ_SWT *= c1o2; - //kxxMzzFromfcNEQ_SWT *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //SET// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= kw; - // kw = neighborFX[kw]; - // ks = ksw; - // kb = kbw; - // ksw = neighborFX[ksw]; - // kbw = neighborFX[kbw]; - // kbs = kbsw; - // kbsw = neighborFX[kbsw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_SET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_SET = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SET); - ////vx2_SET = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SET); - ////vx3_SET = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SET); - - // //kxyFromfcNEQ_SET = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx2_SET))); - // //kyzFromfcNEQ_SET = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SET) - ((vx2_SET*vx3_SET))); - // //kxzFromfcNEQ_SET = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx3_SET))); - // //kxxMyyFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx2_SET*vx2_SET))); - // //kxxMzzFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx3_SET*vx3_SET))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_SET += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_SET += vx1_tmp; - //vx2_SET += vx2_tmp; - //vx3_SET += vx3_tmp; - - // drho_SET *= c1o2; - // vx1_SET *= c1o2; - //vx2_SET *= c1o2; - //vx3_SET *= c1o2; - - // kxyFromfcNEQ_SET += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_SET += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_SET += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_SET *= c1o2; - //kyzFromfcNEQ_SET *= c1o2; - //kxzFromfcNEQ_SET *= c1o2; - //kxxMyyFromfcNEQ_SET *= c1o2; - //kxxMzzFromfcNEQ_SET *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //SEB// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kb = kzero; - // kbw = kw; - // kbs = ks; - // kbsw = ksw; - // kzero= k0w; - // kw = neighborFX[k0w]; - // ks = k0sw; - // ksw = neighborFX[k0sw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_SEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_SEB = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SEB); - ////vx2_SEB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SEB); - ////vx3_SEB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SEB); - - // //kxyFromfcNEQ_SEB = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx2_SEB))); - // //kyzFromfcNEQ_SEB = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SEB) - ((vx2_SEB*vx3_SEB))); - // //kxzFromfcNEQ_SEB = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx3_SEB))); - // //kxxMyyFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx2_SEB*vx2_SEB))); - // //kxxMzzFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx3_SEB*vx3_SEB))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_SEB += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_SEB += vx1_tmp; - //vx2_SEB += vx2_tmp; - //vx3_SEB += vx3_tmp; - - // drho_SEB *= c1o2; - // vx1_SEB *= c1o2; - //vx2_SEB *= c1o2; - //vx3_SEB *= c1o2; - - // kxyFromfcNEQ_SEB += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_SEB += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_SEB += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_SEB *= c1o2; - //kyzFromfcNEQ_SEB *= c1o2; - //kxzFromfcNEQ_SEB *= c1o2; - //kxxMyyFromfcNEQ_SEB *= c1o2; - //kxxMzzFromfcNEQ_SEB *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //NWB// - // ////////////////////////////////////////////////////////////////////////// - // //index 0 - // k0zero= k0s; - // k0w = k0sw; - // k0s = neighborFY[k0s]; - // k0b = k0bs; - // k0sw = neighborFY[k0sw]; - // k0bw = k0bsw; - // k0bs = neighborFY[k0bs]; - // k0bsw = neighborFY[k0bsw]; - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= k0zero; - // kw = k0w; - // ks = k0s; - // kb = k0b; - // ksw = k0sw; - // kbw = k0bw; - // kbs = k0bs; - // kbsw = k0bsw; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_NWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_NWB = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWB); - ////vx2_NWB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWB); - ////vx3_NWB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWB); - - // //kxyFromfcNEQ_NWB = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx2_NWB))); - // //kyzFromfcNEQ_NWB = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWB) - ((vx2_NWB*vx3_NWB))); - // //kxzFromfcNEQ_NWB = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx3_NWB))); - // //kxxMyyFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx2_NWB*vx2_NWB))); - // //kxxMzzFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx3_NWB*vx3_NWB))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_NWB += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_NWB += vx1_tmp; - //vx2_NWB += vx2_tmp; - //vx3_NWB += vx3_tmp; - - // drho_NWB *= c1o2; - // vx1_NWB *= c1o2; - //vx2_NWB *= c1o2; - //vx3_NWB *= c1o2; - - // kxyFromfcNEQ_NWB += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_NWB += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_NWB += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_NWB *= c1o2; - //kyzFromfcNEQ_NWB *= c1o2; - //kxzFromfcNEQ_NWB *= c1o2; - //kxxMyyFromfcNEQ_NWB *= c1o2; - //kxxMzzFromfcNEQ_NWB *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //NWT// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= kb; - // kw = kbw; - // ks = kbs; - // kb = neighborFZ[kb]; - // ksw = kbsw; - // kbw = neighborFZ[kbw]; - // kbs = neighborFZ[kbs]; - // kbsw = neighborFZ[kbsw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_NWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_NWT = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWT); - ////vx2_NWT = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWT); - ////vx3_NWT = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWT); - - // //kxyFromfcNEQ_NWT = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx2_NWT))); - // //kyzFromfcNEQ_NWT = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWT) - ((vx2_NWT*vx3_NWT))); - // //kxzFromfcNEQ_NWT = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx3_NWT))); - // //kxxMyyFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx2_NWT*vx2_NWT))); - // //kxxMzzFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx3_NWT*vx3_NWT))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_NWT += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_NWT += vx1_tmp; - //vx2_NWT += vx2_tmp; - //vx3_NWT += vx3_tmp; - - // drho_NWT *= c1o2; - // vx1_NWT *= c1o2; - //vx2_NWT *= c1o2; - //vx3_NWT *= c1o2; - - // kxyFromfcNEQ_NWT += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_NWT += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_NWT += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_NWT *= c1o2; - //kyzFromfcNEQ_NWT *= c1o2; - //kxzFromfcNEQ_NWT *= c1o2; - //kxxMyyFromfcNEQ_NWT *= c1o2; - //kxxMzzFromfcNEQ_NWT *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //NET// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kzero= kw; - // kw = neighborFX[kw]; - // ks = ksw; - // kb = kbw; - // ksw = neighborFX[ksw]; - // kbw = neighborFX[kbw]; - // kbs = kbsw; - // kbsw = neighborFX[kbsw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_NET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_NET = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NET); - ////vx2_NET = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NET); - ////vx3_NET = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NET); - - // //kxyFromfcNEQ_NET = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx2_NET))); - // //kyzFromfcNEQ_NET = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NET) - ((vx2_NET*vx3_NET))); - // //kxzFromfcNEQ_NET = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx3_NET))); - // //kxxMyyFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx2_NET*vx2_NET))); - // //kxxMzzFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx3_NET*vx3_NET))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_NET += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_NET += vx1_tmp; - //vx2_NET += vx2_tmp; - //vx3_NET += vx3_tmp; - - // drho_NET *= c1o2; - // vx1_NET *= c1o2; - //vx2_NET *= c1o2; - //vx3_NET *= c1o2; - - // kxyFromfcNEQ_NET += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_NET += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_NET += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_NET *= c1o2; - //kyzFromfcNEQ_NET *= c1o2; - //kxzFromfcNEQ_NET *= c1o2; - //kxxMyyFromfcNEQ_NET *= c1o2; - //kxxMzzFromfcNEQ_NET *= c1o2; - - // ////////////////////////////////////////////////////////////////////////// - // //NEB// - // ////////////////////////////////////////////////////////////////////////// - // //index - // kb = kzero; - // kbw = kw; - // kbs = ks; - // kbsw = ksw; - // kzero= k0w; - // kw = neighborFX[k0w]; - // ks = k0sw; - // ksw = neighborFX[k0sw]; - // //////////////////////////////////////////////////////////////////////////////// - // f_E = fwF[kw]; - // f_W = feF[kzero]; - // f_N = fsF[ks]; - // f_S = fnF[kzero]; - // f_T = fbF[kb]; - // f_B = ftF[kzero]; - // f_NE = fswF[ksw]; - // f_SW = fneF[kzero]; - // f_SE = fnwF[kw]; - // f_NW = fseF[ks]; - // f_TE = fbwF[kbw]; - // f_BW = fteF[kzero]; - // f_BE = ftwF[kw]; - // f_TW = fbeF[kb]; - // f_TN = fbsF[kbs]; - // f_BS = ftnF[kzero]; - // f_BN = ftsF[ks]; - // f_TS = fbnF[kb]; - // f_ZERO = fzeroF[kzero]; - // f_TNE = fbswF[kbsw]; - // f_TSW = fbneF[kb]; - // f_TSE = fbnwF[kbw]; - // f_TNW = fbseF[kbs]; - // f_BNE = ftswF[ksw]; - // f_BSW = ftneF[kzero]; - // f_BSE = ftnwF[kw]; - // f_BNW = ftseF[ks]; - - // //drho_NEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // //vx1_NEB = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NEB); - ////vx2_NEB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NEB); - ////vx3_NEB = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NEB); - - // //kxyFromfcNEQ_NEB = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx2_NEB))); - // //kyzFromfcNEQ_NEB = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NEB) - ((vx2_NEB*vx3_NEB))); - // //kxzFromfcNEQ_NEB = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx3_NEB))); - // //kxxMyyFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB))); - // //kxxMzzFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB))); - - //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; - // - //drho_NEB += rho_tmp; - - //vx1_tmp = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp); - //vx2_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp); - //vx3_tmp = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp); - - // vx1_NEB += vx1_tmp; - //vx2_NEB += vx2_tmp; - //vx3_NEB += vx3_tmp; - - // drho_NEB *= c1o2; - // vx1_NEB *= c1o2; - //vx2_NEB *= c1o2; - //vx3_NEB *= c1o2; - - // kxyFromfcNEQ_NEB += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp))); - // kyzFromfcNEQ_NEB += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp))); - // kxzFromfcNEQ_NEB += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp))); - // kxxMyyFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp))); - // kxxMzzFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp))); - - //kxyFromfcNEQ_NEB *= c1o2; - //kyzFromfcNEQ_NEB *= c1o2; - //kxzFromfcNEQ_NEB *= c1o2; - //kxxMyyFromfcNEQ_NEB *= c1o2; - //kxxMzzFromfcNEQ_NEB *= c1o2; - // - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //kxyFromfcNEQ_SWB = zero; - //kyzFromfcNEQ_SWB = zero; - //kxzFromfcNEQ_SWB = zero; - //kxxMyyFromfcNEQ_SWB = zero; - //kxxMzzFromfcNEQ_SWB = zero; - //kxyFromfcNEQ_SWT = zero; - //kyzFromfcNEQ_SWT = zero; - //kxzFromfcNEQ_SWT = zero; - //kxxMyyFromfcNEQ_SWT = zero; - //kxxMzzFromfcNEQ_SWT = zero; - //kxyFromfcNEQ_SET = zero; - //kyzFromfcNEQ_SET = zero; - //kxzFromfcNEQ_SET = zero; - //kxxMyyFromfcNEQ_SET = zero; - //kxxMzzFromfcNEQ_SET = zero; - //kxyFromfcNEQ_SEB = zero; - //kyzFromfcNEQ_SEB = zero; - //kxzFromfcNEQ_SEB = zero; - //kxxMyyFromfcNEQ_SEB = zero; - //kxxMzzFromfcNEQ_SEB = zero; - //kxyFromfcNEQ_NWB = zero; - //kyzFromfcNEQ_NWB = zero; - //kxzFromfcNEQ_NWB = zero; - //kxxMyyFromfcNEQ_NWB = zero; - //kxxMzzFromfcNEQ_NWB = zero; - //kxyFromfcNEQ_NWT = zero; - //kyzFromfcNEQ_NWT = zero; - //kxzFromfcNEQ_NWT = zero; - //kxxMyyFromfcNEQ_NWT = zero; - //kxxMzzFromfcNEQ_NWT = zero; - //kxyFromfcNEQ_NET = zero; - //kyzFromfcNEQ_NET = zero; - //kxzFromfcNEQ_NET = zero; - //kxxMyyFromfcNEQ_NET = zero; - //kxxMzzFromfcNEQ_NET = zero; - //kxyFromfcNEQ_NEB = zero; - //kyzFromfcNEQ_NEB = zero; - //kxzFromfcNEQ_NEB = zero; - //kxxMyyFromfcNEQ_NEB = zero; - //kxxMzzFromfcNEQ_NEB = zero; ////////////////////////////////////////////////////////////////////////// //3 ////////////////////////////////////////////////////////////////////////// @@ -13278,8 +12474,8 @@ __global__ void scaleFC_Fix_comp_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -13296,96 +12492,96 @@ __global__ void scaleFC_Fix_comp_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -15138,8 +14334,8 @@ __global__ void scaleFC_NSPress_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -15156,96 +14352,96 @@ __global__ void scaleFC_NSPress_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -16344,8 +15540,8 @@ __global__ void scaleFC_Fix_27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -16362,96 +15558,96 @@ __global__ void scaleFC_Fix_27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -17704,8 +16900,8 @@ __global__ void scaleFCpress27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -17722,96 +16918,96 @@ __global__ void scaleFCpress27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -18629,8 +17825,8 @@ __global__ void scaleFCLast27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -18647,96 +17843,96 @@ __global__ void scaleFCLast27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -20027,8 +19223,8 @@ __global__ void scaleFCThSMG7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -20040,127 +19236,124 @@ __global__ void scaleFCThSMG7( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions7 D7F; - D7F.f[0] = &DD7F[0*size_MatF]; - D7F.f[1] = &DD7F[1*size_MatF]; - D7F.f[2] = &DD7F[2*size_MatF]; - D7F.f[3] = &DD7F[3*size_MatF]; - D7F.f[4] = &DD7F[4*size_MatF]; - D7F.f[5] = &DD7F[5*size_MatF]; - D7F.f[6] = &DD7F[6*size_MatF]; + D7F.f[0] = &DD7F[0*numberOfLBnodesFine]; + D7F.f[1] = &DD7F[1*numberOfLBnodesFine]; + D7F.f[2] = &DD7F[2*numberOfLBnodesFine]; + D7F.f[3] = &DD7F[3*numberOfLBnodesFine]; + D7F.f[4] = &DD7F[4*numberOfLBnodesFine]; + D7F.f[5] = &DD7F[5*numberOfLBnodesFine]; + D7F.f[6] = &DD7F[6*numberOfLBnodesFine]; Distributions7 D7C; if (isEvenTimestep==true) { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[1] = &DD7C[1*size_MatC]; - D7C.f[2] = &DD7C[2*size_MatC]; - D7C.f[3] = &DD7C[3*size_MatC]; - D7C.f[4] = &DD7C[4*size_MatC]; - D7C.f[5] = &DD7C[5*size_MatC]; - D7C.f[6] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse]; } else { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[2] = &DD7C[1*size_MatC]; - D7C.f[1] = &DD7C[2*size_MatC]; - D7C.f[4] = &DD7C[3*size_MatC]; - D7C.f[3] = &DD7C[4*size_MatC]; - D7C.f[6] = &DD7C[5*size_MatC]; - D7C.f[5] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -20900,8 +20093,8 @@ __global__ void scaleFCThS7( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -20912,127 +20105,124 @@ __global__ void scaleFCThS7( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions7 D7F; - D7F.f[0] = &DD7F[0*size_MatF]; - D7F.f[1] = &DD7F[1*size_MatF]; - D7F.f[2] = &DD7F[2*size_MatF]; - D7F.f[3] = &DD7F[3*size_MatF]; - D7F.f[4] = &DD7F[4*size_MatF]; - D7F.f[5] = &DD7F[5*size_MatF]; - D7F.f[6] = &DD7F[6*size_MatF]; + D7F.f[0] = &DD7F[0*numberOfLBnodesFine]; + D7F.f[1] = &DD7F[1*numberOfLBnodesFine]; + D7F.f[2] = &DD7F[2*numberOfLBnodesFine]; + D7F.f[3] = &DD7F[3*numberOfLBnodesFine]; + D7F.f[4] = &DD7F[4*numberOfLBnodesFine]; + D7F.f[5] = &DD7F[5*numberOfLBnodesFine]; + D7F.f[6] = &DD7F[6*numberOfLBnodesFine]; Distributions7 D7C; if (isEvenTimestep==true) { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[1] = &DD7C[1*size_MatC]; - D7C.f[2] = &DD7C[2*size_MatC]; - D7C.f[3] = &DD7C[3*size_MatC]; - D7C.f[4] = &DD7C[4*size_MatC]; - D7C.f[5] = &DD7C[5*size_MatC]; - D7C.f[6] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse]; } else { - D7C.f[0] = &DD7C[0*size_MatC]; - D7C.f[2] = &DD7C[1*size_MatC]; - D7C.f[1] = &DD7C[2*size_MatC]; - D7C.f[4] = &DD7C[3*size_MatC]; - D7C.f[3] = &DD7C[4*size_MatC]; - D7C.f[6] = &DD7C[5*size_MatC]; - D7C.f[5] = &DD7C[6*size_MatC]; + D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse]; + D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse]; + D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse]; + D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse]; + D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse]; + D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse]; + D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -21691,8 +20881,8 @@ __global__ void scaleFCThS27( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -21704,187 +20894,184 @@ __global__ void scaleFCThS27( real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - //fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - //fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } Distributions27 D27F; - D27F.f[DIR_P00 ] = &DD27F[DIR_P00 *size_MatF]; - D27F.f[DIR_M00 ] = &DD27F[DIR_M00 *size_MatF]; - D27F.f[DIR_0P0 ] = &DD27F[DIR_0P0 *size_MatF]; - D27F.f[DIR_0M0 ] = &DD27F[DIR_0M0 *size_MatF]; - D27F.f[DIR_00P ] = &DD27F[DIR_00P *size_MatF]; - D27F.f[DIR_00M ] = &DD27F[DIR_00M *size_MatF]; - D27F.f[DIR_PP0 ] = &DD27F[DIR_PP0 *size_MatF]; - D27F.f[DIR_MM0 ] = &DD27F[DIR_MM0 *size_MatF]; - D27F.f[DIR_PM0 ] = &DD27F[DIR_PM0 *size_MatF]; - D27F.f[DIR_MP0 ] = &DD27F[DIR_MP0 *size_MatF]; - D27F.f[DIR_P0P ] = &DD27F[DIR_P0P *size_MatF]; - D27F.f[DIR_M0M ] = &DD27F[DIR_M0M *size_MatF]; - D27F.f[DIR_P0M ] = &DD27F[DIR_P0M *size_MatF]; - D27F.f[DIR_M0P ] = &DD27F[DIR_M0P *size_MatF]; - D27F.f[DIR_0PP ] = &DD27F[DIR_0PP *size_MatF]; - D27F.f[DIR_0MM ] = &DD27F[DIR_0MM *size_MatF]; - D27F.f[DIR_0PM ] = &DD27F[DIR_0PM *size_MatF]; - D27F.f[DIR_0MP ] = &DD27F[DIR_0MP *size_MatF]; - D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF]; - D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF]; - D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF]; - D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF]; - D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF]; - D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF]; - D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF]; - D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF]; - D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF]; + D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine]; + D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine]; + D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine]; + D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine]; + D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine]; + D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine]; + D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine]; + D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine]; + D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine]; + D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine]; + D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine]; + D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine]; + D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine]; + D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine]; + D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine]; + D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine]; + D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine]; + D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine]; + D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine]; + D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine]; + D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine]; + D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine]; + D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine]; + D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine]; + D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine]; + D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine]; + D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine]; Distributions27 D27C; if (isEvenTimestep==true) { - D27C.f[DIR_P00 ] = &DD27C[DIR_P00 *size_MatC]; - D27C.f[DIR_M00 ] = &DD27C[DIR_M00 *size_MatC]; - D27C.f[DIR_0P0 ] = &DD27C[DIR_0P0 *size_MatC]; - D27C.f[DIR_0M0 ] = &DD27C[DIR_0M0 *size_MatC]; - D27C.f[DIR_00P ] = &DD27C[DIR_00P *size_MatC]; - D27C.f[DIR_00M ] = &DD27C[DIR_00M *size_MatC]; - D27C.f[DIR_PP0 ] = &DD27C[DIR_PP0 *size_MatC]; - D27C.f[DIR_MM0 ] = &DD27C[DIR_MM0 *size_MatC]; - D27C.f[DIR_PM0 ] = &DD27C[DIR_PM0 *size_MatC]; - D27C.f[DIR_MP0 ] = &DD27C[DIR_MP0 *size_MatC]; - D27C.f[DIR_P0P ] = &DD27C[DIR_P0P *size_MatC]; - D27C.f[DIR_M0M ] = &DD27C[DIR_M0M *size_MatC]; - D27C.f[DIR_P0M ] = &DD27C[DIR_P0M *size_MatC]; - D27C.f[DIR_M0P ] = &DD27C[DIR_M0P *size_MatC]; - D27C.f[DIR_0PP ] = &DD27C[DIR_0PP *size_MatC]; - D27C.f[DIR_0MM ] = &DD27C[DIR_0MM *size_MatC]; - D27C.f[DIR_0PM ] = &DD27C[DIR_0PM *size_MatC]; - D27C.f[DIR_0MP ] = &DD27C[DIR_0MP *size_MatC]; - D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC]; - D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC]; - D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC]; - D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC]; - D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC]; - D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC]; - D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC]; - D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC]; - D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC]; + D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse]; + D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse]; + D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse]; + D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse]; + D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse]; + D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse]; + D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse]; + D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse]; + D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse]; + D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse]; + D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse]; + D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse]; + D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse]; + D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse]; + D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse]; + D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse]; + D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse]; + D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse]; + D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse]; + D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse]; + D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse]; + D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse]; + D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse]; } else { - D27C.f[DIR_M00 ] = &DD27C[DIR_P00 *size_MatC]; - D27C.f[DIR_P00 ] = &DD27C[DIR_M00 *size_MatC]; - D27C.f[DIR_0M0 ] = &DD27C[DIR_0P0 *size_MatC]; - D27C.f[DIR_0P0 ] = &DD27C[DIR_0M0 *size_MatC]; - D27C.f[DIR_00M ] = &DD27C[DIR_00P *size_MatC]; - D27C.f[DIR_00P ] = &DD27C[DIR_00M *size_MatC]; - D27C.f[DIR_MM0 ] = &DD27C[DIR_PP0 *size_MatC]; - D27C.f[DIR_PP0 ] = &DD27C[DIR_MM0 *size_MatC]; - D27C.f[DIR_MP0 ] = &DD27C[DIR_PM0 *size_MatC]; - D27C.f[DIR_PM0 ] = &DD27C[DIR_MP0 *size_MatC]; - D27C.f[DIR_M0M ] = &DD27C[DIR_P0P *size_MatC]; - D27C.f[DIR_P0P ] = &DD27C[DIR_M0M *size_MatC]; - D27C.f[DIR_M0P ] = &DD27C[DIR_P0M *size_MatC]; - D27C.f[DIR_P0M ] = &DD27C[DIR_M0P *size_MatC]; - D27C.f[DIR_0MM ] = &DD27C[DIR_0PP *size_MatC]; - D27C.f[DIR_0PP ] = &DD27C[DIR_0MM *size_MatC]; - D27C.f[DIR_0MP ] = &DD27C[DIR_0PM *size_MatC]; - D27C.f[DIR_0PM ] = &DD27C[DIR_0MP *size_MatC]; - D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC]; - D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC]; - D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC]; - D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC]; - D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC]; - D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC]; - D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC]; - D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC]; - D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC]; + D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse]; + D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse]; + D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse]; + D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse]; + D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse]; + D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse]; + D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse]; + D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse]; + D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse]; + D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse]; + D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse]; + D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse]; + D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse]; + D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse]; + D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse]; + D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse]; + D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse]; + D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse]; + D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse]; + D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse]; + D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse]; + D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse]; + D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse]; + D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse]; + D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -21980,33 +21167,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22067,33 +21254,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22154,33 +21341,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22241,33 +21428,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22338,33 +21525,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22425,33 +21612,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22512,33 +21699,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22599,33 +21786,33 @@ __global__ void scaleFCThS27( real* DC, f_BSE = fbseF[kbs]; f_BNW = fbnwF[kbw]; ////////////////////////////////////////////////////////////////////////////////// - f27E = (D27F.f[DIR_P00 ])[kzero];//ke - f27W = (D27F.f[DIR_M00 ])[kw ]; - f27N = (D27F.f[DIR_0P0 ])[kzero];//kn - f27S = (D27F.f[DIR_0M0 ])[ks ]; - f27T = (D27F.f[DIR_00P ])[kzero];//kt - f27B = (D27F.f[DIR_00M ])[kb ]; - f27NE = (D27F.f[DIR_PP0 ])[kzero];//kne - f27SW = (D27F.f[DIR_MM0 ])[ksw ]; - f27SE = (D27F.f[DIR_PM0 ])[ks ];//kse - f27NW = (D27F.f[DIR_MP0 ])[kw ];//knw - f27TE = (D27F.f[DIR_P0P ])[kzero];//kte - f27BW = (D27F.f[DIR_M0M ])[kbw ]; - f27BE = (D27F.f[DIR_P0M ])[kb ];//kbe - f27TW = (D27F.f[DIR_M0P ])[kw ];//ktw - f27TN = (D27F.f[DIR_0PP ])[kzero];//ktn - f27BS = (D27F.f[DIR_0MM ])[kbs ]; - f27BN = (D27F.f[DIR_0PM ])[kb ];//kbn - f27TS = (D27F.f[DIR_0MP ])[ks ];//kts + f27E = (D27F.f[DIR_P00])[kzero];//ke + f27W = (D27F.f[DIR_M00])[kw ]; + f27N = (D27F.f[DIR_0P0])[kzero];//kn + f27S = (D27F.f[DIR_0M0])[ks ]; + f27T = (D27F.f[DIR_00P])[kzero];//kt + f27B = (D27F.f[DIR_00M])[kb ]; + f27NE = (D27F.f[DIR_PP0])[kzero];//kne + f27SW = (D27F.f[DIR_MM0])[ksw ]; + f27SE = (D27F.f[DIR_PM0])[ks ];//kse + f27NW = (D27F.f[DIR_MP0])[kw ];//knw + f27TE = (D27F.f[DIR_P0P])[kzero];//kte + f27BW = (D27F.f[DIR_M0M])[kbw ]; + f27BE = (D27F.f[DIR_P0M])[kb ];//kbe + f27TW = (D27F.f[DIR_M0P])[kw ];//ktw + f27TN = (D27F.f[DIR_0PP])[kzero];//ktn + f27BS = (D27F.f[DIR_0MM])[kbs ]; + f27BN = (D27F.f[DIR_0PM])[kb ];//kbn + f27TS = (D27F.f[DIR_0MP])[ks ];//kts f27ZERO = (D27F.f[DIR_000])[kzero];//kzero - f27TNE = (D27F.f[DIR_PPP ])[kzero];//ktne - f27TSW = (D27F.f[DIR_MMP ])[ksw ];//ktsw - f27TSE = (D27F.f[DIR_PMP ])[ks ];//ktse - f27TNW = (D27F.f[DIR_MPP ])[kw ];//ktnw - f27BNE = (D27F.f[DIR_PPM ])[kb ];//kbne - f27BSW = (D27F.f[DIR_MMM ])[kbsw ]; - f27BSE = (D27F.f[DIR_PMM ])[kbs ];//kbse - f27BNW = (D27F.f[DIR_MPM ])[kbw ];//kbnw + f27TNE = (D27F.f[DIR_PPP])[kzero];//ktne + f27TSW = (D27F.f[DIR_MMP])[ksw ];//ktsw + f27TSE = (D27F.f[DIR_PMP])[ks ];//ktse + f27TNW = (D27F.f[DIR_MPP])[kw ];//ktnw + f27BNE = (D27F.f[DIR_PPM])[kb ];//kbne + f27BSW = (D27F.f[DIR_MMM])[kbsw ]; + f27BSE = (D27F.f[DIR_PMM])[kbs ];//kbse + f27BNW = (D27F.f[DIR_MPM])[kbw ];//kbnw Conc_F_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + @@ -22739,32 +21926,32 @@ __global__ void scaleFCThS27( real* DC, cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D27C.f[DIR_000])[kzero] = c8o27* Conc_C*(c1o1-cu_sq); - (D27C.f[DIR_P00 ])[kzero] = c2o27* (c3o1*( Mx )+Conc_C*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); - (D27C.f[DIR_M00 ])[kw ] = c2o27* (c3o1*(-Mx )+Conc_C*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); - (D27C.f[DIR_0P0 ])[kzero] = c2o27* (c3o1*( My )+Conc_C*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); - (D27C.f[DIR_0M0 ])[ks ] = c2o27* (c3o1*( -My )+Conc_C*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); - (D27C.f[DIR_00P ])[kzero] = c2o27* (c3o1*( Mz)+Conc_C*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); - (D27C.f[DIR_00M ])[kb ] = c2o27* (c3o1*( -Mz)+Conc_C*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); - (D27C.f[DIR_PP0 ])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_C*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); - (D27C.f[DIR_MM0 ])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_C*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); - (D27C.f[DIR_PM0 ])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_C*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); - (D27C.f[DIR_MP0 ])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_C*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); - (D27C.f[DIR_P0P ])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_C*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); - (D27C.f[DIR_M0M ])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_C*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); - (D27C.f[DIR_P0M ])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_C*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); - (D27C.f[DIR_M0P ])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_C*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); - (D27C.f[DIR_0PP ])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_C*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); - (D27C.f[DIR_0MM ])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_C*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); - (D27C.f[DIR_0PM ])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_C*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); - (D27C.f[DIR_0MP ])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_C*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); - (D27C.f[DIR_PPP ])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); - (D27C.f[DIR_MMM ])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); - (D27C.f[DIR_PPM ])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); - (D27C.f[DIR_MMP ])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); - (D27C.f[DIR_PMP ])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); - (D27C.f[DIR_MPM ])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); - (D27C.f[DIR_PMM ])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); - (D27C.f[DIR_MPP ])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); + (D27C.f[DIR_P00])[kzero] = c2o27* (c3o1*( Mx )+Conc_C*(c1o1+c9o2*( vx1 )*( vx1 )-cu_sq)); + (D27C.f[DIR_M00])[kw ] = c2o27* (c3o1*(-Mx )+Conc_C*(c1o1+c9o2*(-vx1 )*(-vx1 )-cu_sq)); + (D27C.f[DIR_0P0])[kzero] = c2o27* (c3o1*( My )+Conc_C*(c1o1+c9o2*( vx2 )*( vx2 )-cu_sq)); + (D27C.f[DIR_0M0])[ks ] = c2o27* (c3o1*( -My )+Conc_C*(c1o1+c9o2*( -vx2 )*( -vx2 )-cu_sq)); + (D27C.f[DIR_00P])[kzero] = c2o27* (c3o1*( Mz)+Conc_C*(c1o1+c9o2*( vx3)*( vx3)-cu_sq)); + (D27C.f[DIR_00M])[kb ] = c2o27* (c3o1*( -Mz)+Conc_C*(c1o1+c9o2*( -vx3)*( -vx3)-cu_sq)); + (D27C.f[DIR_PP0])[kzero] = c1o54* (c3o1*( Mx +My )+Conc_C*(c1o1+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq)); + (D27C.f[DIR_MM0])[ksw ] = c1o54* (c3o1*(-Mx -My )+Conc_C*(c1o1+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq)); + (D27C.f[DIR_PM0])[ks ] = c1o54* (c3o1*( Mx -My )+Conc_C*(c1o1+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq)); + (D27C.f[DIR_MP0])[kw ] = c1o54* (c3o1*(-Mx +My )+Conc_C*(c1o1+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq)); + (D27C.f[DIR_P0P])[kzero] = c1o54* (c3o1*( Mx +Mz)+Conc_C*(c1o1+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq)); + (D27C.f[DIR_M0M])[kbw ] = c1o54* (c3o1*(-Mx -Mz)+Conc_C*(c1o1+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq)); + (D27C.f[DIR_P0M])[kb ] = c1o54* (c3o1*( Mx -Mz)+Conc_C*(c1o1+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq)); + (D27C.f[DIR_M0P])[kw ] = c1o54* (c3o1*(-Mx +Mz)+Conc_C*(c1o1+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq)); + (D27C.f[DIR_0PP])[kzero] = c1o54* (c3o1*( My +Mz)+Conc_C*(c1o1+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq)); + (D27C.f[DIR_0MM])[kbs ] = c1o54* (c3o1*( -My -Mz)+Conc_C*(c1o1+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq)); + (D27C.f[DIR_0PM])[kb ] = c1o54* (c3o1*( My -Mz)+Conc_C*(c1o1+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq)); + (D27C.f[DIR_0MP])[ks ] = c1o54* (c3o1*( -My +Mz)+Conc_C*(c1o1+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq)); + (D27C.f[DIR_PPP])[kzero] = c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq)); + (D27C.f[DIR_MMM])[kbsw ] = c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq)); + (D27C.f[DIR_PPM])[kb ] = c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq)); + (D27C.f[DIR_MMP])[ksw ] = c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq)); + (D27C.f[DIR_PMP])[ks ] = c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq)); + (D27C.f[DIR_MPM])[kbw ] = c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq)); + (D27C.f[DIR_PMM])[kbs ] = c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq)); + (D27C.f[DIR_MPP])[kw ] = c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq)); } } @@ -22812,8 +21999,8 @@ __global__ void scaleFCEff27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -22830,96 +22017,96 @@ __global__ void scaleFCEff27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index @@ -23791,8 +22978,8 @@ __global__ void scaleFC27(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -23808,96 +22995,96 @@ __global__ void scaleFC27(real* DC, real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF; - feF = &DF[DIR_P00 *size_MatF]; - fwF = &DF[DIR_M00 *size_MatF]; - fnF = &DF[DIR_0P0 *size_MatF]; - fsF = &DF[DIR_0M0 *size_MatF]; - ftF = &DF[DIR_00P *size_MatF]; - fbF = &DF[DIR_00M *size_MatF]; - fneF = &DF[DIR_PP0 *size_MatF]; - fswF = &DF[DIR_MM0 *size_MatF]; - fseF = &DF[DIR_PM0 *size_MatF]; - fnwF = &DF[DIR_MP0 *size_MatF]; - fteF = &DF[DIR_P0P *size_MatF]; - fbwF = &DF[DIR_M0M *size_MatF]; - fbeF = &DF[DIR_P0M *size_MatF]; - ftwF = &DF[DIR_M0P *size_MatF]; - ftnF = &DF[DIR_0PP *size_MatF]; - fbsF = &DF[DIR_0MM *size_MatF]; - fbnF = &DF[DIR_0PM *size_MatF]; - ftsF = &DF[DIR_0MP *size_MatF]; - fzeroF = &DF[DIR_000*size_MatF]; - ftneF = &DF[DIR_PPP *size_MatF]; - ftswF = &DF[DIR_MMP *size_MatF]; - ftseF = &DF[DIR_PMP *size_MatF]; - ftnwF = &DF[DIR_MPP *size_MatF]; - fbneF = &DF[DIR_PPM *size_MatF]; - fbswF = &DF[DIR_MMM *size_MatF]; - fbseF = &DF[DIR_PMM *size_MatF]; - fbnwF = &DF[DIR_MPM *size_MatF]; + feF = &DF[DIR_P00 * numberOfLBnodesFine]; + fwF = &DF[DIR_M00 * numberOfLBnodesFine]; + fnF = &DF[DIR_0P0 * numberOfLBnodesFine]; + fsF = &DF[DIR_0M0 * numberOfLBnodesFine]; + ftF = &DF[DIR_00P * numberOfLBnodesFine]; + fbF = &DF[DIR_00M * numberOfLBnodesFine]; + fneF = &DF[DIR_PP0 * numberOfLBnodesFine]; + fswF = &DF[DIR_MM0 * numberOfLBnodesFine]; + fseF = &DF[DIR_PM0 * numberOfLBnodesFine]; + fnwF = &DF[DIR_MP0 * numberOfLBnodesFine]; + fteF = &DF[DIR_P0P * numberOfLBnodesFine]; + fbwF = &DF[DIR_M0M * numberOfLBnodesFine]; + fbeF = &DF[DIR_P0M * numberOfLBnodesFine]; + ftwF = &DF[DIR_M0P * numberOfLBnodesFine]; + ftnF = &DF[DIR_0PP * numberOfLBnodesFine]; + fbsF = &DF[DIR_0MM * numberOfLBnodesFine]; + fbnF = &DF[DIR_0PM * numberOfLBnodesFine]; + ftsF = &DF[DIR_0MP * numberOfLBnodesFine]; + fzeroF = &DF[DIR_000 * numberOfLBnodesFine]; + ftneF = &DF[DIR_PPP * numberOfLBnodesFine]; + ftswF = &DF[DIR_MMP * numberOfLBnodesFine]; + ftseF = &DF[DIR_PMP * numberOfLBnodesFine]; + ftnwF = &DF[DIR_MPP * numberOfLBnodesFine]; + fbneF = &DF[DIR_PPM * numberOfLBnodesFine]; + fbswF = &DF[DIR_MMM * numberOfLBnodesFine]; + fbseF = &DF[DIR_PMM * numberOfLBnodesFine]; + fbnwF = &DF[DIR_MPM * numberOfLBnodesFine]; real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC; if (isEvenTimestep==true) { - feC = &DC[DIR_P00 *size_MatC]; - fwC = &DC[DIR_M00 *size_MatC]; - fnC = &DC[DIR_0P0 *size_MatC]; - fsC = &DC[DIR_0M0 *size_MatC]; - ftC = &DC[DIR_00P *size_MatC]; - fbC = &DC[DIR_00M *size_MatC]; - fneC = &DC[DIR_PP0 *size_MatC]; - fswC = &DC[DIR_MM0 *size_MatC]; - fseC = &DC[DIR_PM0 *size_MatC]; - fnwC = &DC[DIR_MP0 *size_MatC]; - fteC = &DC[DIR_P0P *size_MatC]; - fbwC = &DC[DIR_M0M *size_MatC]; - fbeC = &DC[DIR_P0M *size_MatC]; - ftwC = &DC[DIR_M0P *size_MatC]; - ftnC = &DC[DIR_0PP *size_MatC]; - fbsC = &DC[DIR_0MM *size_MatC]; - fbnC = &DC[DIR_0PM *size_MatC]; - ftsC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - ftneC = &DC[DIR_PPP *size_MatC]; - ftswC = &DC[DIR_MMP *size_MatC]; - ftseC = &DC[DIR_PMP *size_MatC]; - ftnwC = &DC[DIR_MPP *size_MatC]; - fbneC = &DC[DIR_PPM *size_MatC]; - fbswC = &DC[DIR_MMM *size_MatC]; - fbseC = &DC[DIR_PMM *size_MatC]; - fbnwC = &DC[DIR_MPM *size_MatC]; + feC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fwC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00P * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fneC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fswC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fteC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } else { - fwC = &DC[DIR_P00 *size_MatC]; - feC = &DC[DIR_M00 *size_MatC]; - fsC = &DC[DIR_0P0 *size_MatC]; - fnC = &DC[DIR_0M0 *size_MatC]; - fbC = &DC[DIR_00P *size_MatC]; - ftC = &DC[DIR_00M *size_MatC]; - fswC = &DC[DIR_PP0 *size_MatC]; - fneC = &DC[DIR_MM0 *size_MatC]; - fnwC = &DC[DIR_PM0 *size_MatC]; - fseC = &DC[DIR_MP0 *size_MatC]; - fbwC = &DC[DIR_P0P *size_MatC]; - fteC = &DC[DIR_M0M *size_MatC]; - ftwC = &DC[DIR_P0M *size_MatC]; - fbeC = &DC[DIR_M0P *size_MatC]; - fbsC = &DC[DIR_0PP *size_MatC]; - ftnC = &DC[DIR_0MM *size_MatC]; - ftsC = &DC[DIR_0PM *size_MatC]; - fbnC = &DC[DIR_0MP *size_MatC]; - fzeroC = &DC[DIR_000*size_MatC]; - fbswC = &DC[DIR_PPP *size_MatC]; - fbneC = &DC[DIR_MMP *size_MatC]; - fbnwC = &DC[DIR_PMP *size_MatC]; - fbseC = &DC[DIR_MPP *size_MatC]; - ftswC = &DC[DIR_PPM *size_MatC]; - ftneC = &DC[DIR_MMM *size_MatC]; - ftnwC = &DC[DIR_PMM *size_MatC]; - ftseC = &DC[DIR_MPM *size_MatC]; + fwC = &DC[DIR_P00 * numberOfLBnodesCoarse]; + feC = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fsC = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + fnC = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + fbC = &DC[DIR_00P * numberOfLBnodesCoarse]; + ftC = &DC[DIR_00M * numberOfLBnodesCoarse]; + fswC = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fneC = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fnwC = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fseC = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fbwC = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fteC = &DC[DIR_M0M * numberOfLBnodesCoarse]; + ftwC = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fbeC = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fbsC = &DC[DIR_0PP * numberOfLBnodesCoarse]; + ftnC = &DC[DIR_0MM * numberOfLBnodesCoarse]; + ftsC = &DC[DIR_0PM * numberOfLBnodesCoarse]; + fbnC = &DC[DIR_0MP * numberOfLBnodesCoarse]; + fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse]; + fbswC = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fbneC = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fbnwC = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fbseC = &DC[DIR_MPP * numberOfLBnodesCoarse]; + ftswC = &DC[DIR_PPM * numberOfLBnodesCoarse]; + ftneC = &DC[DIR_MMM * numberOfLBnodesCoarse]; + ftnwC = &DC[DIR_PMM * numberOfLBnodesCoarse]; + ftseC = &DC[DIR_MPM * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// const unsigned ix = threadIdx.x; // Globaler x-Index diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu index e7fe8b50637e97b9c8cc34025216f4d02e684c55..3b108ad4ae43bd63698f3516a207630214695797 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu @@ -23,8 +23,8 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -44,33 +44,33 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC, *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource; - fP00source = &DF[DIR_P00 *size_MatF]; - fM00source = &DF[DIR_M00 *size_MatF]; - f0P0source = &DF[DIR_0P0 *size_MatF]; - f0M0source = &DF[DIR_0M0 *size_MatF]; - f00Psource = &DF[DIR_00P *size_MatF]; - f00Msource = &DF[DIR_00M *size_MatF]; - fPP0source = &DF[DIR_PP0 *size_MatF]; - fMM0source = &DF[DIR_MM0 *size_MatF]; - fPM0source = &DF[DIR_PM0 *size_MatF]; - fMP0source = &DF[DIR_MP0 *size_MatF]; - fP0Psource = &DF[DIR_P0P *size_MatF]; - fM0Msource = &DF[DIR_M0M *size_MatF]; - fP0Msource = &DF[DIR_P0M *size_MatF]; - fM0Psource = &DF[DIR_M0P *size_MatF]; - f0PPsource = &DF[DIR_0PP *size_MatF]; - f0MMsource = &DF[DIR_0MM *size_MatF]; - f0PMsource = &DF[DIR_0PM *size_MatF]; - f0MPsource = &DF[DIR_0MP *size_MatF]; - f000source = &DF[DIR_000*size_MatF]; - fMMMsource = &DF[DIR_MMM *size_MatF]; - fMMPsource = &DF[DIR_MMP *size_MatF]; - fMPPsource = &DF[DIR_MPP *size_MatF]; - fMPMsource = &DF[DIR_MPM *size_MatF]; - fPPMsource = &DF[DIR_PPM *size_MatF]; - fPPPsource = &DF[DIR_PPP *size_MatF]; - fPMPsource = &DF[DIR_PMP *size_MatF]; - fPMMsource = &DF[DIR_PMM *size_MatF]; + fP00source = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00source = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Psource = &DF[DIR_00P * numberOfLBnodesFine]; + f00Msource = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine]; + f000source = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest, @@ -79,83 +79,83 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC, if (isEvenTimestep==true) { - fP00dest = &DC[DIR_P00 *size_MatC]; - fM00dest = &DC[DIR_M00 *size_MatC]; - f0P0dest = &DC[DIR_0P0 *size_MatC]; - f0M0dest = &DC[DIR_0M0 *size_MatC]; - f00Pdest = &DC[DIR_00P *size_MatC]; - f00Mdest = &DC[DIR_00M *size_MatC]; - fPP0dest = &DC[DIR_PP0 *size_MatC]; - fMM0dest = &DC[DIR_MM0 *size_MatC]; - fPM0dest = &DC[DIR_PM0 *size_MatC]; - fMP0dest = &DC[DIR_MP0 *size_MatC]; - fP0Pdest = &DC[DIR_P0P *size_MatC]; - fM0Mdest = &DC[DIR_M0M *size_MatC]; - fP0Mdest = &DC[DIR_P0M *size_MatC]; - fM0Pdest = &DC[DIR_M0P *size_MatC]; - f0PPdest = &DC[DIR_0PP *size_MatC]; - f0MMdest = &DC[DIR_0MM *size_MatC]; - f0PMdest = &DC[DIR_0PM *size_MatC]; - f0MPdest = &DC[DIR_0MP *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_MMM *size_MatC]; - fMMPdest = &DC[DIR_MMP *size_MatC]; - fMPPdest = &DC[DIR_MPP *size_MatC]; - fMPMdest = &DC[DIR_MPM *size_MatC]; - fPPMdest = &DC[DIR_PPM *size_MatC]; - fPPPdest = &DC[DIR_PPP *size_MatC]; - fPMPdest = &DC[DIR_PMP *size_MatC]; - fPMMdest = &DC[DIR_PMM *size_MatC]; + fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00dest = &DC[DIR_M00 *size_MatC]; - fM00dest = &DC[DIR_P00 *size_MatC]; - f0P0dest = &DC[DIR_0M0 *size_MatC]; - f0M0dest = &DC[DIR_0P0 *size_MatC]; - f00Pdest = &DC[DIR_00M *size_MatC]; - f00Mdest = &DC[DIR_00P *size_MatC]; - fPP0dest = &DC[DIR_MM0 *size_MatC]; - fMM0dest = &DC[DIR_PP0 *size_MatC]; - fPM0dest = &DC[DIR_MP0 *size_MatC]; - fMP0dest = &DC[DIR_PM0 *size_MatC]; - fP0Pdest = &DC[DIR_M0M *size_MatC]; - fM0Mdest = &DC[DIR_P0P *size_MatC]; - fP0Mdest = &DC[DIR_M0P *size_MatC]; - fM0Pdest = &DC[DIR_P0M *size_MatC]; - f0PPdest = &DC[DIR_0MM *size_MatC]; - f0MMdest = &DC[DIR_0PP *size_MatC]; - f0PMdest = &DC[DIR_0MP *size_MatC]; - f0MPdest = &DC[DIR_0PM *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_PPP *size_MatC]; - fMMPdest = &DC[DIR_PPM *size_MatC]; - fMPPdest = &DC[DIR_PMM *size_MatC]; - fMPMdest = &DC[DIR_PMP *size_MatC]; - fPPMdest = &DC[DIR_MMP *size_MatC]; - fPPPdest = &DC[DIR_MMM *size_MatC]; - fPMPdest = &DC[DIR_MPM *size_MatC]; - fPMMdest = &DC[DIR_MPP *size_MatC]; + fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; } Distributions6 G; if (isEvenTimestep == true) { - G.g[DIR_P00] = &G6[DIR_P00 *size_MatC]; - G.g[DIR_M00] = &G6[DIR_M00 *size_MatC]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_MatC]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_MatC]; - G.g[DIR_00P] = &G6[DIR_00P *size_MatC]; - G.g[DIR_00M] = &G6[DIR_00M *size_MatC]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_MatC]; - G.g[DIR_P00] = &G6[DIR_M00 *size_MatC]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_MatC]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_MatC]; - G.g[DIR_00M] = &G6[DIR_00P *size_MatC]; - G.g[DIR_00P] = &G6[DIR_00M *size_MatC]; + G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse]; + G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse]; + G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse]; + G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse]; + G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse]; + G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// @@ -1270,8 +1270,8 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC, unsigned int* neighborFX, unsigned int* neighborFY, unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* posC, unsigned int* posFSWB, @@ -1291,33 +1291,33 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC, *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource; - fP00source = &DF[DIR_P00 *size_MatF]; - fM00source = &DF[DIR_M00 *size_MatF]; - f0P0source = &DF[DIR_0P0 *size_MatF]; - f0M0source = &DF[DIR_0M0 *size_MatF]; - f00Psource = &DF[DIR_00P *size_MatF]; - f00Msource = &DF[DIR_00M *size_MatF]; - fPP0source = &DF[DIR_PP0 *size_MatF]; - fMM0source = &DF[DIR_MM0 *size_MatF]; - fPM0source = &DF[DIR_PM0 *size_MatF]; - fMP0source = &DF[DIR_MP0 *size_MatF]; - fP0Psource = &DF[DIR_P0P *size_MatF]; - fM0Msource = &DF[DIR_M0M *size_MatF]; - fP0Msource = &DF[DIR_P0M *size_MatF]; - fM0Psource = &DF[DIR_M0P *size_MatF]; - f0PPsource = &DF[DIR_0PP *size_MatF]; - f0MMsource = &DF[DIR_0MM *size_MatF]; - f0PMsource = &DF[DIR_0PM *size_MatF]; - f0MPsource = &DF[DIR_0MP *size_MatF]; - f000source = &DF[DIR_000*size_MatF]; - fMMMsource = &DF[DIR_MMM *size_MatF]; - fMMPsource = &DF[DIR_MMP *size_MatF]; - fMPPsource = &DF[DIR_MPP *size_MatF]; - fMPMsource = &DF[DIR_MPM *size_MatF]; - fPPMsource = &DF[DIR_PPM *size_MatF]; - fPPPsource = &DF[DIR_PPP *size_MatF]; - fPMPsource = &DF[DIR_PMP *size_MatF]; - fPMMsource = &DF[DIR_PMM *size_MatF]; + fP00source = &DF[DIR_P00 * numberOfLBnodesFine]; + fM00source = &DF[DIR_M00 * numberOfLBnodesFine]; + f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine]; + f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine]; + f00Psource = &DF[DIR_00P * numberOfLBnodesFine]; + f00Msource = &DF[DIR_00M * numberOfLBnodesFine]; + fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine]; + fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine]; + fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine]; + fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine]; + fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine]; + fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine]; + fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine]; + fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine]; + f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine]; + f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine]; + f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine]; + f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine]; + f000source = &DF[DIR_000 * numberOfLBnodesFine]; + fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine]; + fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine]; + fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine]; + fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine]; + fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine]; + fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine]; + fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine]; + fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine]; real *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest, @@ -1326,83 +1326,83 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC, if (isEvenTimestep==true) { - fP00dest = &DC[DIR_P00 *size_MatC]; - fM00dest = &DC[DIR_M00 *size_MatC]; - f0P0dest = &DC[DIR_0P0 *size_MatC]; - f0M0dest = &DC[DIR_0M0 *size_MatC]; - f00Pdest = &DC[DIR_00P *size_MatC]; - f00Mdest = &DC[DIR_00M *size_MatC]; - fPP0dest = &DC[DIR_PP0 *size_MatC]; - fMM0dest = &DC[DIR_MM0 *size_MatC]; - fPM0dest = &DC[DIR_PM0 *size_MatC]; - fMP0dest = &DC[DIR_MP0 *size_MatC]; - fP0Pdest = &DC[DIR_P0P *size_MatC]; - fM0Mdest = &DC[DIR_M0M *size_MatC]; - fP0Mdest = &DC[DIR_P0M *size_MatC]; - fM0Pdest = &DC[DIR_M0P *size_MatC]; - f0PPdest = &DC[DIR_0PP *size_MatC]; - f0MMdest = &DC[DIR_0MM *size_MatC]; - f0PMdest = &DC[DIR_0PM *size_MatC]; - f0MPdest = &DC[DIR_0MP *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_MMM *size_MatC]; - fMMPdest = &DC[DIR_MMP *size_MatC]; - fMPPdest = &DC[DIR_MPP *size_MatC]; - fMPMdest = &DC[DIR_MPM *size_MatC]; - fPPMdest = &DC[DIR_PPM *size_MatC]; - fPPPdest = &DC[DIR_PPP *size_MatC]; - fPMPdest = &DC[DIR_PMP *size_MatC]; - fPMMdest = &DC[DIR_PMM *size_MatC]; + fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; } else { - fP00dest = &DC[DIR_M00 *size_MatC]; - fM00dest = &DC[DIR_P00 *size_MatC]; - f0P0dest = &DC[DIR_0M0 *size_MatC]; - f0M0dest = &DC[DIR_0P0 *size_MatC]; - f00Pdest = &DC[DIR_00M *size_MatC]; - f00Mdest = &DC[DIR_00P *size_MatC]; - fPP0dest = &DC[DIR_MM0 *size_MatC]; - fMM0dest = &DC[DIR_PP0 *size_MatC]; - fPM0dest = &DC[DIR_MP0 *size_MatC]; - fMP0dest = &DC[DIR_PM0 *size_MatC]; - fP0Pdest = &DC[DIR_M0M *size_MatC]; - fM0Mdest = &DC[DIR_P0P *size_MatC]; - fP0Mdest = &DC[DIR_M0P *size_MatC]; - fM0Pdest = &DC[DIR_P0M *size_MatC]; - f0PPdest = &DC[DIR_0MM *size_MatC]; - f0MMdest = &DC[DIR_0PP *size_MatC]; - f0PMdest = &DC[DIR_0MP *size_MatC]; - f0MPdest = &DC[DIR_0PM *size_MatC]; - f000dest = &DC[DIR_000*size_MatC]; - fMMMdest = &DC[DIR_PPP *size_MatC]; - fMMPdest = &DC[DIR_PPM *size_MatC]; - fMPPdest = &DC[DIR_PMM *size_MatC]; - fMPMdest = &DC[DIR_PMP *size_MatC]; - fPPMdest = &DC[DIR_MMP *size_MatC]; - fPPPdest = &DC[DIR_MMM *size_MatC]; - fPMPdest = &DC[DIR_MPM *size_MatC]; - fPMMdest = &DC[DIR_MPP *size_MatC]; + fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse]; + fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse]; + f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse]; + f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse]; + f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse]; + f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse]; + fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse]; + fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse]; + fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse]; + fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse]; + fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse]; + fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse]; + fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse]; + fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse]; + f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse]; + f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse]; + f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse]; + f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse]; + f000dest = &DC[DIR_000 * numberOfLBnodesCoarse]; + fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse]; + fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse]; + fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse]; + fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse]; + fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse]; + fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse]; + fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse]; + fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse]; } Distributions6 G; if (isEvenTimestep == true) { - G.g[DIR_P00] = &G6[DIR_P00 *size_MatC]; - G.g[DIR_M00] = &G6[DIR_M00 *size_MatC]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_MatC]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_MatC]; - G.g[DIR_00P] = &G6[DIR_00P *size_MatC]; - G.g[DIR_00M] = &G6[DIR_00M *size_MatC]; + G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse]; + G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse]; + G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse]; + G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse]; + G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse]; + G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_MatC]; - G.g[DIR_P00] = &G6[DIR_M00 *size_MatC]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_MatC]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_MatC]; - G.g[DIR_00M] = &G6[DIR_00P *size_MatC]; - G.g[DIR_00P] = &G6[DIR_00M *size_MatC]; + G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse]; + G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse]; + G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse]; + G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse]; + G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse]; + G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu index f4160b89c047a7e6244a5579baae03d30b3c89cb..0724002cffa3a47820664851ffefd1c35dbe0235 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu @@ -32,12 +32,13 @@ //======================================================================================= #include "DataTypes.h" -#include "Kernel/Utilities/DistributionHelper.cuh" -#include "Kernel/Utilities/ChimeraTransformation.h" -#include "Kernel/Utilities/ScalingHelperFunctions.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" +#include "LBM/GPUHelperFunctions/ScalingUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////// //! \brief Calculate the interpolated distributions on the fine destination nodes @@ -226,8 +227,8 @@ __global__ void scaleCF_compressible( unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, - unsigned int numberOfLBnodesCoarse, - unsigned int numberOfLBnodesFine, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, @@ -237,13 +238,13 @@ __global__ void scaleCF_compressible( OffCF offsetCF) { //////////////////////////////////////////////////////////////////////////////// - //! - Get the thread index coordinates from threadId_100, blockId_100, blockDim and gridDim. + //! - Get the node index coordinates from threadId_100, blockId_100, blockDim and gridDim. //! - const unsigned k_thread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Return for non-interface node - if (k_thread >= numberOfInterfaceNodes) + if (nodeIndex >= numberOfInterfaceNodes) return; ////////////////////////////////////////////////////////////////////////// @@ -252,8 +253,9 @@ __global__ void scaleCF_compressible( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 distFine = vf::gpu::getDistributionReferences27(distributionsFine, numberOfLBnodesFine, true); - Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); + Distributions27 distFine, distCoarse; + getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true); + getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - declare local variables for source nodes @@ -289,7 +291,7 @@ __global__ void scaleCF_compressible( // source node BSW = MMM //////////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - unsigned int k_base_000 = indicesCoarseMMM[k_thread]; + unsigned int k_base_000 = indicesCoarseMMM[nodeIndex]; unsigned int k_base_M00 = neighborXcoarse [k_base_000]; unsigned int k_base_0M0 = neighborYcoarse [k_base_000]; unsigned int k_base_00M = neighborZcoarse [k_base_000]; @@ -452,119 +454,240 @@ __global__ void scaleCF_compressible( real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111; real d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111; - a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP - - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP + - c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + - c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP + - c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP + - c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP + - c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + - c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c64o1; - b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM + - c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP - - c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP - - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP + - kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP + - c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP + - c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP + - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP + - c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM + - c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM + - c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - - c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c64o1; - c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP - - c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM + - c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP - - c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM - - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM - - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM - - c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM + - c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM + - c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - - c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - - c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM + - c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) / - c64o1; - a_100 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; - b_100 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; - c_100 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; - a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP + - kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + - kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM + - c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + - c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + - c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c16o1; - b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM + - kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP + - c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) / - c8o1; - c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM + - kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - - c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) / - c8o1; - a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; - b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; - c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; - a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM - - kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP + - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - c8o1; - b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM - - c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP + - c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP + - kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP - - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM + - c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM - - c2o1 * vx3_MMP) / - c16o1; - c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM - - kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) / - c8o1; - a_001 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1; - b_001 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1; - c_001 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1; - a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM + - kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP - - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - c8o1; - b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM + - kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + - c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - c8o1; - c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP + - c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM - - c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP + - c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP + - c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP - - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP + - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - c16o1; - a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1; - b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1; - c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1; - a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; - a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; - - a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP; - b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP; - c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP; + // a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - + // kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP - + // kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - + // kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - + // c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP + + // c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + + // c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP + + // c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP + + // c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP + + // c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - + // c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + + // c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / + // c64o1; + a_000 = + c1o64 * (c2o1 * (((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) + + ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + + ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) + + c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) + + ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); + + // b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM + + // c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP - + // c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP - + // kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP + + // kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP + + // c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP + + // c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP + + // c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP + + // c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - + // c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM + + // c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM + + // c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - + // c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / + // c64o1; + b_000 = + c1o64 * (c2o1 * (((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + + ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + + ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) + + ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + + ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) + + ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + + c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); + + // c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + + // kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP - + // c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM + + // c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP - + // c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM - + // c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM - + // c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM - + // c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM + + // c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM + + // c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - + // c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - + // c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM + + // c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) / + // c64o1; + c_000 = + c1o64 * (c2o1 * (((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) + + ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + + ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + + ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + + ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) + + ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) + + ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) + + c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) + + ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP))); + + // a_100 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; + a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM))); + + // b_100 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; + b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM))); + + // c_100 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; + c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM))); + + // a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + + // kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP + + // kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + + // kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM + + // c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + + // c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + + // c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / + // c16o1; + a_200 = + c1o16 * (c2o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) + + ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); + + // b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM + + // kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP + + // c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) / + // c8o1; + b_200 = + c1o8 * (c2o1 * (-((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) + + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM))); + + // c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM + + // kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - c2o1 * + // vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) / + // c8o1; + c_200 = c1o8 * (c2o1 * (((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) + + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM))); + + // a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; + a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP))); + + // b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; + b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP))); + + // c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; + c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP))); + + // a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM - + // kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP + + // c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / + // c8o1; + a_020 = + c1o8 * (c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + + ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP))); + + // b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM - + // c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP + + // c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP + + // kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP - + // kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - + // c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM + + // c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM - + // c2o1 * vx3_MMP) / + // c16o1; + b_020 = + c1o16 * (c2o1 * (((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + + ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); + + // c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM - + // kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + c2o1 * + // vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) / + // c8o1; + c_020 = c1o8 * (c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) + + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + + ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP))); + + // a_001 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1; + a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM))); + + // b_001 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1; + b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM))); + + // c_001 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1; + c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM))); + + // a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM + + // kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP - + // c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / + // c8o1; + a_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) + + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + + ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM))); + + // b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM + + // kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + c2o1 * + // vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / + // c8o1; + b_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) + + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + + ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM))); + + // c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - + // kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP + + // c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM - + // c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP + + // c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP + + // c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP - + // c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP + + // c2o1 * vx2_MMM - c2o1 * vx2_MMP) / + // c16o1; + c_002 = + c1o16 * (c2o1 * (((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) + + ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) + + ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) + + ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM))); + + // a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1; + // b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1; + // c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1; + a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP))); + b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP))); + c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP))); + + // a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; + // b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; + // c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; + a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP))); + b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP))); + c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))); + + // a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; + // b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; + // c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; + a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP))); + b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))); + c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP))); + + // a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP; + // b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP; + // c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP; + a_111 = ((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPM - vx1_PMP) + (vx1_PMM - vx1_MPP)); + b_111 = ((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPM - vx2_PMP) + (vx2_PMM - vx2_MPP)); + c_111 = ((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPM - vx3_PMP) + (vx3_PMM - vx3_MPP)); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -618,9 +741,9 @@ __global__ void scaleCF_compressible( //////////////////////////////////////////////////////////////////////////////// //! - Set the relative position of the offset cell {-1, 0, 1} //! - real xoff = offsetCF.xOffCF[k_thread]; - real yoff = offsetCF.yOffCF[k_thread]; - real zoff = offsetCF.zOffCF[k_thread]; + real xoff = offsetCF.xOffCF[nodeIndex]; + real yoff = offsetCF.yOffCF[nodeIndex]; + real zoff = offsetCF.zOffCF[nodeIndex]; real xoff_sq = xoff * xoff; real yoff_sq = yoff * yoff; @@ -632,14 +755,29 @@ __global__ void scaleCF_compressible( ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1)) ? c0o1 : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001); - d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8; - d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4; - d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4; - d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4; - d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2; - d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2; - d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2; - d_111 = -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP; + // d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8; + d_000 = c1o8 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))); + + // d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4; + d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM))); + + // d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4; + d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP))); + + // d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4; + d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM))); + + // d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2; + d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))); + + // d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2; + d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP))); + + // d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2; + d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM))); + + // d_111 = -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP; + d_111 = (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_PMM - drho_MPP) + (drho_MPM - drho_PMP))); ////////////////////////////////////////////////////////////////////////// //! - Extrapolation for refinement in to the wall (polynomial coefficients) @@ -768,7 +906,7 @@ __global__ void scaleCF_compressible( ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - k_base_000 = indicesFineMMM[k_thread]; + k_base_000 = indicesFineMMM[nodeIndex]; k_base_M00 = neighborXfine [k_base_000]; k_base_0M0 = neighborYfine [k_base_000]; k_base_00M = neighborZfine [k_base_000]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu index 3ab8b9d20279eff341ca42d20cee9fe7550a2039..e7d999d108e59bca98bf87b813f9479f1c601266 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu @@ -31,12 +31,13 @@ //! \author Martin Schoenherr, Anna Wellmann //======================================================================================= -#include "Kernel/Utilities/DistributionHelper.cuh" -#include "Kernel/Utilities/ChimeraTransformation.h" -#include "Kernel/Utilities/ScalingHelperFunctions.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ScalingUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////// //! \brief Interpolate from fine to coarse @@ -54,8 +55,8 @@ __global__ void scaleFC_compressible( unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, - unsigned int numberOfLBnodesCoarse, - unsigned int numberOfLBnodesFine, + unsigned long long numberOfLBnodesCoarse, + unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, @@ -65,13 +66,13 @@ __global__ void scaleFC_compressible( OffFC offsetFC) { //////////////////////////////////////////////////////////////////////////////// - //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned k_thread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Return for non-interface node - if (k_thread >= numberOfInterfaceNodes) + if (nodeIndex >= numberOfInterfaceNodes) return; ////////////////////////////////////////////////////////////////////////// @@ -80,8 +81,9 @@ __global__ void scaleFC_compressible( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 distFine = vf::gpu::getDistributionReferences27(distributionsFine, numberOfLBnodesFine, true); - Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); + Distributions27 distFine, distCoarse; + getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true); + getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - declare local variables for source nodes @@ -117,7 +119,7 @@ __global__ void scaleFC_compressible( // source node BSW = MMM ////////////////////////////////////////////////////////////////////////// // index of the base node and its neighbors - unsigned int k_base_000 = indicesFineMMM[k_thread]; + unsigned int k_base_000 = indicesFineMMM[nodeIndex]; unsigned int k_base_M00 = neighborXfine [k_base_000]; unsigned int k_base_0M0 = neighborYfine [k_base_000]; unsigned int k_base_00M = neighborZfine [k_base_000]; @@ -278,115 +280,120 @@ __global__ void scaleFC_compressible( real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011; real d_000, d_100, d_010, d_001, d_110, d_101, d_011; - a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP - - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP + - c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + - c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP + - c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP + - c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP + - c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + - c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c64o1; - b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM + - c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP - - c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP - - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP + - kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP + - c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP + - c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP + - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP + - c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM + - c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM + - c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - - c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c64o1; - c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP - - c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM + - c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP - - c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM - - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM - - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM - - c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM + - c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM + - c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - - c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - - c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM + - c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) / - c64o1; - a_100 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; - b_100 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; - c_100 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; - a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP + - kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP + - kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + - kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM + - c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + - c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + - c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) / - c16o1; - b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM + - kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP + - c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) / - c8o1; - c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM + - kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - - c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) / - c8o1; - a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1; - b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1; - c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1; - a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM - - kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP + - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - c8o1; - b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM - - c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP + - c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP + - kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP - - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM - - c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM + - c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM - - c2o1 * vx3_MMP) / - c16o1; - c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM - - kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) / - c8o1; - a_001 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1; - b_001 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1; - c_001 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1; - a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM + - kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP - - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - c8o1; - b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM + - kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + - c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) / - c8o1; - c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP - - kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP + - c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM - - c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP + - c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP + - c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP - - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP + - c2o1 * vx2_MMM - c2o1 * vx2_MMP) / - c16o1; - a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1; - b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1; - c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1; - a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; - a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1; - b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1; - c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1; + a_000 = c1o64 * ( + c2o1 * ( + ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) + + ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + + ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) + + c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) + + ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); + b_000 = c1o64 * ( + c2o1 * ( + ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + + ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + + ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) + + ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + + ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) + + ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + + c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); + c_000 = c1o64 * ( + c2o1 * ( + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) + + ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + + ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + + ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + + ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) + + ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) + + ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) + + c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) + + ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP))); + + a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM))); + b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM))); + c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM))); + + a_200 = c1o16 * ( + c2o1 * ( + ((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) + + ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM))); + b_200 = c1o8 * ( + c2o1 * ( + -((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) + + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM))); + c_200 = c1o8 * ( + c2o1 * ( + ((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) + + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM))); + + a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP))); + b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP))); + c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP))); + + a_020 = c1o8 * ( + c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + + ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + + ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP))); + b_020 = c1o16 * ( + c2o1 * ( + ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + + ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) + + ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + + ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP))); + c_020 = c1o8 * ( + c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) + + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + + ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP))); + + a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM))); + b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM))); + c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM))); + + a_002 = c1o8 * ( + c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) + + ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + + ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM))); + b_002 = c1o8 * ( + c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) + + ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + + ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM))); + c_002 = c1o16 * ( + c2o1 * ( + ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + + ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) + + ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) + + ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) + + ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + + ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM))); + + a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP))); + b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP))); + c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP))); + + a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP))); + b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP))); + c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))); + + a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP))); + b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))); + c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP))); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -399,9 +406,9 @@ __global__ void scaleFC_compressible( //////////////////////////////////////////////////////////////////////////////// //! - Set the relative position of the offset cell {-1, 0, 1} //! - real xoff = offsetFC.xOffFC[k_thread]; - real yoff = offsetFC.yOffFC[k_thread]; - real zoff = offsetFC.zOffFC[k_thread]; + real xoff = offsetFC.xOffFC[nodeIndex]; + real yoff = offsetFC.yOffFC[nodeIndex]; + real zoff = offsetFC.zOffFC[nodeIndex]; real xoff_sq = xoff * xoff; real yoff_sq = yoff * yoff; @@ -412,15 +419,14 @@ __global__ void scaleFC_compressible( //! real LaplaceRho = ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1)) - ? c0o1 - : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001); - d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP - c2o1 * LaplaceRho) * c1o8; - d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4; - d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4; - d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4; - d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2; - d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2; - d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2; + ? c0o1 : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001); + d_000 = c1o8 * ((((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))) - c2o1 * LaplaceRho); + d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM))); + d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP))); + d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM))); + d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))); + d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP))); + d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM))); ////////////////////////////////////////////////////////////////////////// @@ -639,7 +645,7 @@ __global__ void scaleFC_compressible( //////////////////////////////////////////////////////////////////////////////////// // index of the destination node and its neighbors - k_000 = indicesCoarse000[k_thread]; + k_000 = indicesCoarse000[nodeIndex]; k_M00 = neighborXcoarse [k_000]; k_0M0 = neighborYcoarse [k_000]; k_00M = neighborZcoarse [k_000]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu index 6d497d2a1ab7ec305bec4f1ad1ed2e2d63c4dc27..23666fdcf6714d30b40b4750c52f129cc472761c 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu @@ -15,7 +15,7 @@ __global__ void LBInit27( int myid, unsigned int* neighborY, unsigned int* neighborZ, real* vParabel, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, unsigned int grid_nx, unsigned int grid_ny, unsigned int grid_nz, @@ -24,33 +24,33 @@ __global__ void LBInit27( int myid, int maxlev) { Distributions27 D; - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; //////////////////////////////////////////////////////////////////////////////// unsigned int k; // Zugriff auf arrays im device // @@ -142,32 +142,32 @@ __global__ void LBInit27( int myid, real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D.f[DIR_000])[kzero] = c8o27* (drho-cu_sq); - (D.f[DIR_P00 ])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); - (D.f[DIR_M00 ])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); - (D.f[DIR_0P0 ])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); - (D.f[DIR_0M0 ])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); - (D.f[DIR_00P ])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); - (D.f[DIR_00M ])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); - (D.f[DIR_PP0 ])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - (D.f[DIR_MM0 ])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - (D.f[DIR_PM0 ])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - (D.f[DIR_MP0 ])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - (D.f[DIR_P0P ])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - (D.f[DIR_M0M ])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - (D.f[DIR_P0M ])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - (D.f[DIR_M0P ])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - (D.f[DIR_0PP ])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); - (D.f[DIR_0MM ])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - (D.f[DIR_0PM ])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); - (D.f[DIR_0MP ])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - (D.f[DIR_PPP ])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - (D.f[DIR_MMM ])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - (D.f[DIR_PPM ])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - (D.f[DIR_MMP ])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - (D.f[DIR_PMP ])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - (D.f[DIR_MPM ])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - (D.f[DIR_PMM ])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - (D.f[DIR_MPP ])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + (D.f[DIR_P00])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_M00])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_0P0])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_0M0])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_00P])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_00M])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_PP0])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + (D.f[DIR_MM0])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + (D.f[DIR_PM0])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + (D.f[DIR_MP0])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + (D.f[DIR_P0P])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + (D.f[DIR_M0M])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + (D.f[DIR_P0M])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + (D.f[DIR_M0P])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + (D.f[DIR_0PP])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); + (D.f[DIR_0MM])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + (D.f[DIR_0PM])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); + (D.f[DIR_0MP])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + (D.f[DIR_PPP])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + (D.f[DIR_MMM])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + (D.f[DIR_PPM])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + (D.f[DIR_MMP])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + (D.f[DIR_PMP])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + (D.f[DIR_MPM])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + (D.f[DIR_PMM])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + (D.f[DIR_MPP])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); } //////////////////////////////////////////////////////////////////////////////// @@ -191,7 +191,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD, real omega, bool EvenOrOdd) @@ -207,7 +207,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -218,63 +218,63 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// real drho = rho[k];//0.0f;// @@ -397,62 +397,62 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX, real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D.f[DIR_000])[kzero] = c8o27* (drho-cu_sq); - (D.f[DIR_P00 ])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); - (D.f[DIR_M00 ])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); - (D.f[DIR_0P0 ])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); - (D.f[DIR_0M0 ])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); - (D.f[DIR_00P ])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); - (D.f[DIR_00M ])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); - (D.f[DIR_PP0 ])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - (D.f[DIR_MM0 ])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - (D.f[DIR_PM0 ])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - (D.f[DIR_MP0 ])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - (D.f[DIR_P0P ])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - (D.f[DIR_M0M ])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - (D.f[DIR_P0M ])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - (D.f[DIR_M0P ])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - (D.f[DIR_0PP ])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); - (D.f[DIR_0MM ])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - (D.f[DIR_0PM ])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); - (D.f[DIR_0MP ])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - (D.f[DIR_PPP ])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - (D.f[DIR_MMM ])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - (D.f[DIR_PPM ])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - (D.f[DIR_MMP ])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - (D.f[DIR_PMP ])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - (D.f[DIR_MPM ])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - (D.f[DIR_PMM ])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - (D.f[DIR_MPP ])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + (D.f[DIR_P00])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_M00])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_0P0])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_0M0])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_00P])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_00M])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_PP0])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + (D.f[DIR_MM0])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + (D.f[DIR_PM0])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + (D.f[DIR_MP0])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + (D.f[DIR_P0P])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + (D.f[DIR_M0M])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + (D.f[DIR_P0M])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + (D.f[DIR_M0P])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + (D.f[DIR_0PP])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); + (D.f[DIR_0MM])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + (D.f[DIR_0PM])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); + (D.f[DIR_0MP])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + (D.f[DIR_PPP])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + (D.f[DIR_MMM])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + (D.f[DIR_PPM])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + (D.f[DIR_MMP])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + (D.f[DIR_PMP])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + (D.f[DIR_MPM])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + (D.f[DIR_PMM])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + (D.f[DIR_MPP])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); ////////////////////////////////////////////////////////////////////////// (D.f[DIR_000])[kzero] += f_ZERO; - (D.f[DIR_P00 ])[ke ] += f_E ; - (D.f[DIR_M00 ])[kw ] += f_E ; - (D.f[DIR_0P0 ])[kn ] += f_N ; - (D.f[DIR_0M0 ])[ks ] += f_N ; - (D.f[DIR_00P ])[kt ] += f_T ; - (D.f[DIR_00M ])[kb ] += f_T ; - (D.f[DIR_PP0 ])[kne ] += f_NE ; - (D.f[DIR_MM0 ])[ksw ] += f_NE ; - (D.f[DIR_PM0 ])[kse ] += f_SE ; - (D.f[DIR_MP0 ])[knw ] += f_SE ; - (D.f[DIR_P0P ])[kte ] += f_TE ; - (D.f[DIR_M0M ])[kbw ] += f_TE ; - (D.f[DIR_P0M ])[kbe ] += f_BE ; - (D.f[DIR_M0P ])[ktw ] += f_BE ; - (D.f[DIR_0PP ])[ktn ] += f_TN ; - (D.f[DIR_0MM ])[kbs ] += f_TN ; - (D.f[DIR_0PM ])[kbn ] += f_BN ; - (D.f[DIR_0MP ])[kts ] += f_BN ; - (D.f[DIR_PPP ])[ktne ] += f_TNE ; - (D.f[DIR_MMM ])[kbsw ] += f_TNE ; - (D.f[DIR_PPM ])[kbne ] += f_TSW ; - (D.f[DIR_MMP ])[ktsw ] += f_TSW ; - (D.f[DIR_PMP ])[ktse ] += f_TSE ; - (D.f[DIR_MPM ])[kbnw ] += f_TSE ; - (D.f[DIR_PMM ])[kbse ] += f_TNW ; - (D.f[DIR_MPP ])[ktnw ] += f_TNW ; + (D.f[DIR_P00])[ke ] += f_E ; + (D.f[DIR_M00])[kw ] += f_E ; + (D.f[DIR_0P0])[kn ] += f_N ; + (D.f[DIR_0M0])[ks ] += f_N ; + (D.f[DIR_00P])[kt ] += f_T ; + (D.f[DIR_00M])[kb ] += f_T ; + (D.f[DIR_PP0])[kne ] += f_NE ; + (D.f[DIR_MM0])[ksw ] += f_NE ; + (D.f[DIR_PM0])[kse ] += f_SE ; + (D.f[DIR_MP0])[knw ] += f_SE ; + (D.f[DIR_P0P])[kte ] += f_TE ; + (D.f[DIR_M0M])[kbw ] += f_TE ; + (D.f[DIR_P0M])[kbe ] += f_BE ; + (D.f[DIR_M0P])[ktw ] += f_BE ; + (D.f[DIR_0PP])[ktn ] += f_TN ; + (D.f[DIR_0MM])[kbs ] += f_TN ; + (D.f[DIR_0PM])[kbn ] += f_BN ; + (D.f[DIR_0MP])[kts ] += f_BN ; + (D.f[DIR_PPP])[ktne ] += f_TNE ; + (D.f[DIR_MMM])[kbsw ] += f_TNE ; + (D.f[DIR_PPM])[kbne ] += f_TSW ; + (D.f[DIR_MMP])[ktsw ] += f_TSW ; + (D.f[DIR_PMP])[ktse ] += f_TSE ; + (D.f[DIR_MPM])[kbnw ] += f_TSE ; + (D.f[DIR_PMM])[kbse ] += f_TNW ; + (D.f[DIR_MPP])[ktnw ] += f_TNW ; ////////////////////////////////////////////////////////////////////////// } @@ -460,7 +460,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX, { ////////////////////////////////////////////////////////////////////////// Distributions27 D; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; ////////////////////////////////////////////////////////////////////////// (D.f[DIR_000])[k] = c96o1; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu index c091aa8b9a29017ddc0f6ea6584e805d7afc4859..7f67d1692f7e136a6537be6780fe8625adc33e22 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu @@ -47,7 +47,7 @@ __global__ void InitAD27( real* velocityX, real* velocityY, real* velocityZ, - uint size_Mat, + unsigned long long numberOfLBnodes, real* distributionsAD, bool isEvenTimestep) { @@ -68,7 +68,7 @@ __global__ void InitAD27( ////////////////////////////////////////////////////////////////////////// // run for all indices in size_Mat and fluid nodes - if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) + if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID)) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -77,63 +77,63 @@ __global__ void InitAD27( Distributions27 distAD; if (isEvenTimestep) { - distAD.f[DIR_P00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_M00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat]; + distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } else { - distAD.f[DIR_M00 ] = &distributionsAD[DIR_P00 *size_Mat]; - distAD.f[DIR_P00 ] = &distributionsAD[DIR_M00 *size_Mat]; - distAD.f[DIR_0M0 ] = &distributionsAD[DIR_0P0 *size_Mat]; - distAD.f[DIR_0P0 ] = &distributionsAD[DIR_0M0 *size_Mat]; - distAD.f[DIR_00M ] = &distributionsAD[DIR_00P *size_Mat]; - distAD.f[DIR_00P ] = &distributionsAD[DIR_00M *size_Mat]; - distAD.f[DIR_MM0 ] = &distributionsAD[DIR_PP0 *size_Mat]; - distAD.f[DIR_PP0 ] = &distributionsAD[DIR_MM0 *size_Mat]; - distAD.f[DIR_MP0 ] = &distributionsAD[DIR_PM0 *size_Mat]; - distAD.f[DIR_PM0 ] = &distributionsAD[DIR_MP0 *size_Mat]; - distAD.f[DIR_M0M ] = &distributionsAD[DIR_P0P *size_Mat]; - distAD.f[DIR_P0P ] = &distributionsAD[DIR_M0M *size_Mat]; - distAD.f[DIR_M0P ] = &distributionsAD[DIR_P0M *size_Mat]; - distAD.f[DIR_P0M ] = &distributionsAD[DIR_M0P *size_Mat]; - distAD.f[DIR_0MM ] = &distributionsAD[DIR_0PP *size_Mat]; - distAD.f[DIR_0PP ] = &distributionsAD[DIR_0MM *size_Mat]; - distAD.f[DIR_0MP ] = &distributionsAD[DIR_0PM *size_Mat]; - distAD.f[DIR_0PM ] = &distributionsAD[DIR_0MP *size_Mat]; - distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat]; - distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat]; - distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat]; - distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat]; - distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat]; - distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat]; - distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat]; - distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat]; - distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat]; + distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes]; + distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes]; + distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes]; + distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes]; + distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes]; + distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes]; + distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes]; + distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes]; + distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes]; + distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes]; + distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes]; + distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes]; + distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes]; + distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes]; + distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes]; + distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes]; + distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes]; + distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes]; + distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes]; + distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes]; + distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes]; + distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes]; + distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes]; + distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes]; + distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes]; + distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes]; + distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// //! - Set local velocities and concetration @@ -178,32 +178,32 @@ __global__ void InitAD27( real cu_sq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3); (distAD.f[DIR_000])[kzero] = c8o27 * conc * (c1o1 - cu_sq); - (distAD.f[DIR_P00 ])[ke ] = c2o27 * conc * (c1o1 + c3o1 * ( vx1 ) + c9o2 * ( vx1 ) * ( vx1 ) - cu_sq); - (distAD.f[DIR_M00 ])[kw ] = c2o27 * conc * (c1o1 + c3o1 * (-vx1 ) + c9o2 * (-vx1 ) * (-vx1 ) - cu_sq); - (distAD.f[DIR_0P0 ])[kn ] = c2o27 * conc * (c1o1 + c3o1 * ( vx2 ) + c9o2 * ( vx2 ) * ( vx2 ) - cu_sq); - (distAD.f[DIR_0M0 ])[ks ] = c2o27 * conc * (c1o1 + c3o1 * ( - vx2 ) + c9o2 * ( - vx2 ) * ( - vx2 ) - cu_sq); - (distAD.f[DIR_00P ])[kt ] = c2o27 * conc * (c1o1 + c3o1 * ( vx3) + c9o2 * ( vx3) * ( vx3) - cu_sq); - (distAD.f[DIR_00M ])[kb ] = c2o27 * conc * (c1o1 + c3o1 * ( - vx3) + c9o2 * ( - vx3) * ( - vx3) - cu_sq); - (distAD.f[DIR_PP0 ])[kne ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 + vx2 ) + c9o2 * ( vx1 + vx2 ) * ( vx1 + vx2 ) - cu_sq); - (distAD.f[DIR_MM0 ])[ksw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 - vx2 ) + c9o2 * (-vx1 - vx2 ) * (-vx1 - vx2 ) - cu_sq); - (distAD.f[DIR_PM0 ])[kse ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 - vx2 ) + c9o2 * ( vx1 - vx2 ) * ( vx1 - vx2 ) - cu_sq); - (distAD.f[DIR_MP0 ])[knw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 + vx2 ) + c9o2 * (-vx1 + vx2 ) * (-vx1 + vx2 ) - cu_sq); - (distAD.f[DIR_P0P ])[kte ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 + vx3) + c9o2 * ( vx1 + vx3) * ( vx1 + vx3) - cu_sq); - (distAD.f[DIR_M0M ])[kbw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 - vx3) + c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq); - (distAD.f[DIR_P0M ])[kbe ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 - vx3) + c9o2 * ( vx1 - vx3) * ( vx1 - vx3) - cu_sq); - (distAD.f[DIR_M0P ])[ktw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 + vx3) + c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq); - (distAD.f[DIR_0PP ])[ktn ] = c1o54 * conc * (c1o1 + c3o1 * ( vx2 + vx3) + c9o2 * ( vx2 + vx3) * ( vx2 + vx3) - cu_sq); - (distAD.f[DIR_0MM ])[kbs ] = c1o54 * conc * (c1o1 + c3o1 * ( - vx2 - vx3) + c9o2 * ( - vx2 - vx3) * ( - vx2 - vx3) - cu_sq); - (distAD.f[DIR_0PM ])[kbn ] = c1o54 * conc * (c1o1 + c3o1 * ( vx2 - vx3) + c9o2 * ( vx2 - vx3) * ( vx2 - vx3) - cu_sq); - (distAD.f[DIR_0MP ])[kts ] = c1o54 * conc * (c1o1 + c3o1 * ( - vx2 + vx3) + c9o2 * ( - vx2 + vx3) * ( - vx2 + vx3) - cu_sq); - (distAD.f[DIR_PPP ])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq); - (distAD.f[DIR_MMM ])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq); - (distAD.f[DIR_PPM ])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq); - (distAD.f[DIR_MMP ])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq); - (distAD.f[DIR_PMP ])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq); - (distAD.f[DIR_MPM ])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq); - (distAD.f[DIR_PMM ])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq); - (distAD.f[DIR_MPP ])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq); + (distAD.f[DIR_P00])[ke ] = c2o27 * conc * (c1o1 + c3o1 * ( vx1 ) + c9o2 * ( vx1 ) * ( vx1 ) - cu_sq); + (distAD.f[DIR_M00])[kw ] = c2o27 * conc * (c1o1 + c3o1 * (-vx1 ) + c9o2 * (-vx1 ) * (-vx1 ) - cu_sq); + (distAD.f[DIR_0P0])[kn ] = c2o27 * conc * (c1o1 + c3o1 * ( vx2 ) + c9o2 * ( vx2 ) * ( vx2 ) - cu_sq); + (distAD.f[DIR_0M0])[ks ] = c2o27 * conc * (c1o1 + c3o1 * ( - vx2 ) + c9o2 * ( - vx2 ) * ( - vx2 ) - cu_sq); + (distAD.f[DIR_00P])[kt ] = c2o27 * conc * (c1o1 + c3o1 * ( vx3) + c9o2 * ( vx3) * ( vx3) - cu_sq); + (distAD.f[DIR_00M])[kb ] = c2o27 * conc * (c1o1 + c3o1 * ( - vx3) + c9o2 * ( - vx3) * ( - vx3) - cu_sq); + (distAD.f[DIR_PP0])[kne ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 + vx2 ) + c9o2 * ( vx1 + vx2 ) * ( vx1 + vx2 ) - cu_sq); + (distAD.f[DIR_MM0])[ksw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 - vx2 ) + c9o2 * (-vx1 - vx2 ) * (-vx1 - vx2 ) - cu_sq); + (distAD.f[DIR_PM0])[kse ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 - vx2 ) + c9o2 * ( vx1 - vx2 ) * ( vx1 - vx2 ) - cu_sq); + (distAD.f[DIR_MP0])[knw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 + vx2 ) + c9o2 * (-vx1 + vx2 ) * (-vx1 + vx2 ) - cu_sq); + (distAD.f[DIR_P0P])[kte ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 + vx3) + c9o2 * ( vx1 + vx3) * ( vx1 + vx3) - cu_sq); + (distAD.f[DIR_M0M])[kbw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 - vx3) + c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq); + (distAD.f[DIR_P0M])[kbe ] = c1o54 * conc * (c1o1 + c3o1 * ( vx1 - vx3) + c9o2 * ( vx1 - vx3) * ( vx1 - vx3) - cu_sq); + (distAD.f[DIR_M0P])[ktw ] = c1o54 * conc * (c1o1 + c3o1 * (-vx1 + vx3) + c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq); + (distAD.f[DIR_0PP])[ktn ] = c1o54 * conc * (c1o1 + c3o1 * ( vx2 + vx3) + c9o2 * ( vx2 + vx3) * ( vx2 + vx3) - cu_sq); + (distAD.f[DIR_0MM])[kbs ] = c1o54 * conc * (c1o1 + c3o1 * ( - vx2 - vx3) + c9o2 * ( - vx2 - vx3) * ( - vx2 - vx3) - cu_sq); + (distAD.f[DIR_0PM])[kbn ] = c1o54 * conc * (c1o1 + c3o1 * ( vx2 - vx3) + c9o2 * ( vx2 - vx3) * ( vx2 - vx3) - cu_sq); + (distAD.f[DIR_0MP])[kts ] = c1o54 * conc * (c1o1 + c3o1 * ( - vx2 + vx3) + c9o2 * ( - vx2 + vx3) * ( - vx2 + vx3) - cu_sq); + (distAD.f[DIR_PPP])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq); + (distAD.f[DIR_MMM])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq); + (distAD.f[DIR_PPM])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq); + (distAD.f[DIR_MMP])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq); + (distAD.f[DIR_PMP])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq); + (distAD.f[DIR_MPM])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq); + (distAD.f[DIR_PMM])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq); + (distAD.f[DIR_MPP])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq); } } @@ -263,63 +263,63 @@ __global__ void InitAD27( // Distributions27 D27; // if (EvenOrOdd==true) // { -// D27.f[DIR_P00 ] = &DD27[DIR_P00 *size_Mat]; -// D27.f[DIR_M00 ] = &DD27[DIR_M00 *size_Mat]; -// D27.f[DIR_0P0 ] = &DD27[DIR_0P0 *size_Mat]; -// D27.f[DIR_0M0 ] = &DD27[DIR_0M0 *size_Mat]; -// D27.f[DIR_00P ] = &DD27[DIR_00P *size_Mat]; -// D27.f[DIR_00M ] = &DD27[DIR_00M *size_Mat]; -// D27.f[DIR_PP0 ] = &DD27[DIR_PP0 *size_Mat]; -// D27.f[DIR_MM0 ] = &DD27[DIR_MM0 *size_Mat]; -// D27.f[DIR_PM0 ] = &DD27[DIR_PM0 *size_Mat]; -// D27.f[DIR_MP0 ] = &DD27[DIR_MP0 *size_Mat]; -// D27.f[DIR_P0P ] = &DD27[DIR_P0P *size_Mat]; -// D27.f[DIR_M0M ] = &DD27[DIR_M0M *size_Mat]; -// D27.f[DIR_P0M ] = &DD27[DIR_P0M *size_Mat]; -// D27.f[DIR_M0P ] = &DD27[DIR_M0P *size_Mat]; -// D27.f[DIR_0PP ] = &DD27[DIR_0PP *size_Mat]; -// D27.f[DIR_0MM ] = &DD27[DIR_0MM *size_Mat]; -// D27.f[DIR_0PM ] = &DD27[DIR_0PM *size_Mat]; -// D27.f[DIR_0MP ] = &DD27[DIR_0MP *size_Mat]; -// D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; -// D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat]; -// D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat]; -// D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat]; -// D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat]; -// D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat]; -// D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat]; -// D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat]; -// D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat]; +// D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat]; +// D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat]; +// D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat]; +// D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat]; +// D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat]; +// D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat]; +// D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat]; +// D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat]; +// D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat]; +// D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat]; +// D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat]; +// D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat]; +// D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat]; +// D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat]; +// D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat]; +// D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat]; +// D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat]; +// D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat]; +// D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; +// D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat]; +// D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat]; +// D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat]; +// D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat]; +// D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat]; +// D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat]; +// D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat]; +// D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat]; // } // else // { -// D27.f[DIR_M00 ] = &DD27[DIR_P00 *size_Mat]; -// D27.f[DIR_P00 ] = &DD27[DIR_M00 *size_Mat]; -// D27.f[DIR_0M0 ] = &DD27[DIR_0P0 *size_Mat]; -// D27.f[DIR_0P0 ] = &DD27[DIR_0M0 *size_Mat]; -// D27.f[DIR_00M ] = &DD27[DIR_00P *size_Mat]; -// D27.f[DIR_00P ] = &DD27[DIR_00M *size_Mat]; -// D27.f[DIR_MM0 ] = &DD27[DIR_PP0 *size_Mat]; -// D27.f[DIR_PP0 ] = &DD27[DIR_MM0 *size_Mat]; -// D27.f[DIR_MP0 ] = &DD27[DIR_PM0 *size_Mat]; -// D27.f[DIR_PM0 ] = &DD27[DIR_MP0 *size_Mat]; -// D27.f[DIR_M0M ] = &DD27[DIR_P0P *size_Mat]; -// D27.f[DIR_P0P ] = &DD27[DIR_M0M *size_Mat]; -// D27.f[DIR_M0P ] = &DD27[DIR_P0M *size_Mat]; -// D27.f[DIR_P0M ] = &DD27[DIR_M0P *size_Mat]; -// D27.f[DIR_0MM ] = &DD27[DIR_0PP *size_Mat]; -// D27.f[DIR_0PP ] = &DD27[DIR_0MM *size_Mat]; -// D27.f[DIR_0MP ] = &DD27[DIR_0PM *size_Mat]; -// D27.f[DIR_0PM ] = &DD27[DIR_0MP *size_Mat]; -// D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; -// D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat]; -// D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat]; -// D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat]; -// D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat]; -// D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat]; -// D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat]; -// D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat]; -// D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat]; +// D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat]; +// D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat]; +// D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat]; +// D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat]; +// D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat]; +// D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat]; +// D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat]; +// D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat]; +// D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat]; +// D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat]; +// D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat]; +// D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat]; +// D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat]; +// D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat]; +// D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat]; +// D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat]; +// D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat]; +// D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat]; +// D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; +// D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat]; +// D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat]; +// D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat]; +// D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat]; +// D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat]; +// D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat]; +// D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat]; +// D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat]; // } // ////////////////////////////////////////////////////////////////////////// // real ConcD = Conc[k]; @@ -391,32 +391,32 @@ __global__ void InitAD27( // real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); // (D27.f[DIR_000])[kzero] = c8o27* ConcD*(c1o1-cu_sq); -// (D27.f[DIR_P00 ])[ke ] = c2o27* ConcD*(c1o1+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); -// (D27.f[DIR_M00 ])[kw ] = c2o27* ConcD*(c1o1+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); -// (D27.f[DIR_0P0 ])[kn ] = c2o27* ConcD*(c1o1+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); -// (D27.f[DIR_0M0 ])[ks ] = c2o27* ConcD*(c1o1+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); -// (D27.f[DIR_00P ])[kt ] = c2o27* ConcD*(c1o1+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); -// (D27.f[DIR_00M ])[kb ] = c2o27* ConcD*(c1o1+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); -// (D27.f[DIR_PP0 ])[kne ] = c1o54* ConcD*(c1o1+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); -// (D27.f[DIR_MM0 ])[ksw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); -// (D27.f[DIR_PM0 ])[kse ] = c1o54* ConcD*(c1o1+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); -// (D27.f[DIR_MP0 ])[knw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); -// (D27.f[DIR_P0P ])[kte ] = c1o54* ConcD*(c1o1+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); -// (D27.f[DIR_M0M ])[kbw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); -// (D27.f[DIR_P0M ])[kbe ] = c1o54* ConcD*(c1o1+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); -// (D27.f[DIR_M0P ])[ktw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); -// (D27.f[DIR_0PP ])[ktn ] = c1o54* ConcD*(c1o1+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); -// (D27.f[DIR_0MM ])[kbs ] = c1o54* ConcD*(c1o1+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); -// (D27.f[DIR_0PM ])[kbn ] = c1o54* ConcD*(c1o1+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); -// (D27.f[DIR_0MP ])[kts ] = c1o54* ConcD*(c1o1+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); -// (D27.f[DIR_PPP ])[ktne ] = c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); -// (D27.f[DIR_MMM ])[kbsw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); -// (D27.f[DIR_PPM ])[kbne ] = c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); -// (D27.f[DIR_MMP ])[ktsw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); -// (D27.f[DIR_PMP ])[ktse ] = c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); -// (D27.f[DIR_MPM ])[kbnw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); -// (D27.f[DIR_PMM ])[kbse ] = c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); -// (D27.f[DIR_MPP ])[ktnw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); +// (D27.f[DIR_P00])[ke ] = c2o27* ConcD*(c1o1+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); +// (D27.f[DIR_M00])[kw ] = c2o27* ConcD*(c1o1+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); +// (D27.f[DIR_0P0])[kn ] = c2o27* ConcD*(c1o1+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); +// (D27.f[DIR_0M0])[ks ] = c2o27* ConcD*(c1o1+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); +// (D27.f[DIR_00P])[kt ] = c2o27* ConcD*(c1o1+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); +// (D27.f[DIR_00M])[kb ] = c2o27* ConcD*(c1o1+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); +// (D27.f[DIR_PP0])[kne ] = c1o54* ConcD*(c1o1+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); +// (D27.f[DIR_MM0])[ksw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); +// (D27.f[DIR_PM0])[kse ] = c1o54* ConcD*(c1o1+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); +// (D27.f[DIR_MP0])[knw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); +// (D27.f[DIR_P0P])[kte ] = c1o54* ConcD*(c1o1+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); +// (D27.f[DIR_M0M])[kbw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); +// (D27.f[DIR_P0M])[kbe ] = c1o54* ConcD*(c1o1+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); +// (D27.f[DIR_M0P])[ktw ] = c1o54* ConcD*(c1o1+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); +// (D27.f[DIR_0PP])[ktn ] = c1o54* ConcD*(c1o1+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); +// (D27.f[DIR_0MM])[kbs ] = c1o54* ConcD*(c1o1+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); +// (D27.f[DIR_0PM])[kbn ] = c1o54* ConcD*(c1o1+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); +// (D27.f[DIR_0MP])[kts ] = c1o54* ConcD*(c1o1+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); +// (D27.f[DIR_PPP])[ktne ] = c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); +// (D27.f[DIR_MMM])[kbsw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); +// (D27.f[DIR_PPM])[kbne ] = c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); +// (D27.f[DIR_MMP])[ktsw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); +// (D27.f[DIR_PMP])[ktse ] = c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); +// (D27.f[DIR_MPM])[kbnw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); +// (D27.f[DIR_PMM])[kbse ] = c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); +// (D27.f[DIR_MPP])[ktnw ] = c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // } // } @@ -448,7 +448,7 @@ __global__ void InitAD7( unsigned int* neighborX, real* ux, real* uy, real* uz, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, real* DD7, bool EvenOrOdd) { @@ -463,7 +463,7 @@ __global__ void InitAD7( unsigned int* neighborX, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if(k<size_Mat) + if(k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -474,23 +474,23 @@ __global__ void InitAD7( unsigned int* neighborX, Distributions7 D7; if (EvenOrOdd==true) { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[1] = &DD7[1*size_Mat]; - D7.f[2] = &DD7[2*size_Mat]; - D7.f[3] = &DD7[3*size_Mat]; - D7.f[4] = &DD7[4*size_Mat]; - D7.f[5] = &DD7[5*size_Mat]; - D7.f[6] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[1] = &DD7[1*numberOfLBnodes]; + D7.f[2] = &DD7[2*numberOfLBnodes]; + D7.f[3] = &DD7[3*numberOfLBnodes]; + D7.f[4] = &DD7[4*numberOfLBnodes]; + D7.f[5] = &DD7[5*numberOfLBnodes]; + D7.f[6] = &DD7[6*numberOfLBnodes]; } else { - D7.f[0] = &DD7[0*size_Mat]; - D7.f[2] = &DD7[1*size_Mat]; - D7.f[1] = &DD7[2*size_Mat]; - D7.f[4] = &DD7[3*size_Mat]; - D7.f[3] = &DD7[4*size_Mat]; - D7.f[6] = &DD7[5*size_Mat]; - D7.f[5] = &DD7[6*size_Mat]; + D7.f[0] = &DD7[0*numberOfLBnodes]; + D7.f[2] = &DD7[1*numberOfLBnodes]; + D7.f[1] = &DD7[2*numberOfLBnodes]; + D7.f[4] = &DD7[3*numberOfLBnodes]; + D7.f[3] = &DD7[4*numberOfLBnodes]; + D7.f[6] = &DD7[5*numberOfLBnodes]; + D7.f[5] = &DD7[6*numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// real ConcD = Conc[k]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h deleted file mode 100644 index 2f6a11aa17398b65858508c3f94b241c16551b37..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h +++ /dev/null @@ -1,177 +0,0 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ -// \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ -// -// This file is part of VirtualFluids. VirtualFluids is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \file KernelUtilities.h -//! \ingroup GPU -//! \author Martin Schoenherr, Anna Wellmann -//====================================================================================== -#ifndef KERNELUTILS_H -#define KERNELUTILS_H - -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include "lbm/constants/NumericConstants.h" - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; - -__inline__ __device__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep) -{ - if (isEvenTimestep) - { - dist.f[DIR_P00 ] = &distributionArray[DIR_P00 *numberOfLBnodes]; - dist.f[DIR_M00 ] = &distributionArray[DIR_M00 *numberOfLBnodes]; - dist.f[DIR_0P0 ] = &distributionArray[DIR_0P0 *numberOfLBnodes]; - dist.f[DIR_0M0 ] = &distributionArray[DIR_0M0 *numberOfLBnodes]; - dist.f[DIR_00P ] = &distributionArray[DIR_00P *numberOfLBnodes]; - dist.f[DIR_00M ] = &distributionArray[DIR_00M *numberOfLBnodes]; - dist.f[DIR_PP0 ] = &distributionArray[DIR_PP0 *numberOfLBnodes]; - dist.f[DIR_MM0 ] = &distributionArray[DIR_MM0 *numberOfLBnodes]; - dist.f[DIR_PM0 ] = &distributionArray[DIR_PM0 *numberOfLBnodes]; - dist.f[DIR_MP0 ] = &distributionArray[DIR_MP0 *numberOfLBnodes]; - dist.f[DIR_P0P ] = &distributionArray[DIR_P0P *numberOfLBnodes]; - dist.f[DIR_M0M ] = &distributionArray[DIR_M0M *numberOfLBnodes]; - dist.f[DIR_P0M ] = &distributionArray[DIR_P0M *numberOfLBnodes]; - dist.f[DIR_M0P ] = &distributionArray[DIR_M0P *numberOfLBnodes]; - dist.f[DIR_0PP ] = &distributionArray[DIR_0PP *numberOfLBnodes]; - dist.f[DIR_0MM ] = &distributionArray[DIR_0MM *numberOfLBnodes]; - dist.f[DIR_0PM ] = &distributionArray[DIR_0PM *numberOfLBnodes]; - dist.f[DIR_0MP ] = &distributionArray[DIR_0MP *numberOfLBnodes]; - dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes]; - dist.f[DIR_PPP ] = &distributionArray[DIR_PPP *numberOfLBnodes]; - dist.f[DIR_MMP ] = &distributionArray[DIR_MMP *numberOfLBnodes]; - dist.f[DIR_PMP ] = &distributionArray[DIR_PMP *numberOfLBnodes]; - dist.f[DIR_MPP ] = &distributionArray[DIR_MPP *numberOfLBnodes]; - dist.f[DIR_PPM ] = &distributionArray[DIR_PPM *numberOfLBnodes]; - dist.f[DIR_MMM ] = &distributionArray[DIR_MMM *numberOfLBnodes]; - dist.f[DIR_PMM ] = &distributionArray[DIR_PMM *numberOfLBnodes]; - dist.f[DIR_MPM ] = &distributionArray[DIR_MPM *numberOfLBnodes]; - } - else - { - dist.f[DIR_M00 ] = &distributionArray[DIR_P00 *numberOfLBnodes]; - dist.f[DIR_P00 ] = &distributionArray[DIR_M00 *numberOfLBnodes]; - dist.f[DIR_0M0 ] = &distributionArray[DIR_0P0 *numberOfLBnodes]; - dist.f[DIR_0P0 ] = &distributionArray[DIR_0M0 *numberOfLBnodes]; - dist.f[DIR_00M ] = &distributionArray[DIR_00P *numberOfLBnodes]; - dist.f[DIR_00P ] = &distributionArray[DIR_00M *numberOfLBnodes]; - dist.f[DIR_MM0 ] = &distributionArray[DIR_PP0 *numberOfLBnodes]; - dist.f[DIR_PP0 ] = &distributionArray[DIR_MM0 *numberOfLBnodes]; - dist.f[DIR_MP0 ] = &distributionArray[DIR_PM0 *numberOfLBnodes]; - dist.f[DIR_PM0 ] = &distributionArray[DIR_MP0 *numberOfLBnodes]; - dist.f[DIR_M0M ] = &distributionArray[DIR_P0P *numberOfLBnodes]; - dist.f[DIR_P0P ] = &distributionArray[DIR_M0M *numberOfLBnodes]; - dist.f[DIR_M0P ] = &distributionArray[DIR_P0M *numberOfLBnodes]; - dist.f[DIR_P0M ] = &distributionArray[DIR_M0P *numberOfLBnodes]; - dist.f[DIR_0MM ] = &distributionArray[DIR_0PP *numberOfLBnodes]; - dist.f[DIR_0PP ] = &distributionArray[DIR_0MM *numberOfLBnodes]; - dist.f[DIR_0MP ] = &distributionArray[DIR_0PM *numberOfLBnodes]; - dist.f[DIR_0PM ] = &distributionArray[DIR_0MP *numberOfLBnodes]; - dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes]; - dist.f[DIR_PPP ] = &distributionArray[DIR_MMM *numberOfLBnodes]; - dist.f[DIR_MMP ] = &distributionArray[DIR_PPM *numberOfLBnodes]; - dist.f[DIR_PMP ] = &distributionArray[DIR_MPM *numberOfLBnodes]; - dist.f[DIR_MPP ] = &distributionArray[DIR_PMM *numberOfLBnodes]; - dist.f[DIR_PPM ] = &distributionArray[DIR_MMP *numberOfLBnodes]; - dist.f[DIR_MMM ] = &distributionArray[DIR_PPP *numberOfLBnodes]; - dist.f[DIR_PMM ] = &distributionArray[DIR_MPP *numberOfLBnodes]; - dist.f[DIR_MPM ] = &distributionArray[DIR_PMP *numberOfLBnodes]; - } -} - -__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices) -{ - subgridD.q[DIR_P00 ] = &subgridDistances[DIR_P00 *numberOfSubgridIndices]; - subgridD.q[DIR_M00 ] = &subgridDistances[DIR_M00 *numberOfSubgridIndices]; - subgridD.q[DIR_0P0 ] = &subgridDistances[DIR_0P0 *numberOfSubgridIndices]; - subgridD.q[DIR_0M0 ] = &subgridDistances[DIR_0M0 *numberOfSubgridIndices]; - subgridD.q[DIR_00P ] = &subgridDistances[DIR_00P *numberOfSubgridIndices]; - subgridD.q[DIR_00M ] = &subgridDistances[DIR_00M *numberOfSubgridIndices]; - subgridD.q[DIR_PP0 ] = &subgridDistances[DIR_PP0 *numberOfSubgridIndices]; - subgridD.q[DIR_MM0 ] = &subgridDistances[DIR_MM0 *numberOfSubgridIndices]; - subgridD.q[DIR_PM0 ] = &subgridDistances[DIR_PM0 *numberOfSubgridIndices]; - subgridD.q[DIR_MP0 ] = &subgridDistances[DIR_MP0 *numberOfSubgridIndices]; - subgridD.q[DIR_P0P ] = &subgridDistances[DIR_P0P *numberOfSubgridIndices]; - subgridD.q[DIR_M0M ] = &subgridDistances[DIR_M0M *numberOfSubgridIndices]; - subgridD.q[DIR_P0M ] = &subgridDistances[DIR_P0M *numberOfSubgridIndices]; - subgridD.q[DIR_M0P ] = &subgridDistances[DIR_M0P *numberOfSubgridIndices]; - subgridD.q[DIR_0PP ] = &subgridDistances[DIR_0PP *numberOfSubgridIndices]; - subgridD.q[DIR_0MM ] = &subgridDistances[DIR_0MM *numberOfSubgridIndices]; - subgridD.q[DIR_0PM ] = &subgridDistances[DIR_0PM *numberOfSubgridIndices]; - subgridD.q[DIR_0MP ] = &subgridDistances[DIR_0MP *numberOfSubgridIndices]; - subgridD.q[DIR_000] = &subgridDistances[DIR_000 *numberOfSubgridIndices]; - subgridD.q[DIR_PPP ] = &subgridDistances[DIR_PPP *numberOfSubgridIndices]; - subgridD.q[DIR_MMP ] = &subgridDistances[DIR_MMP *numberOfSubgridIndices]; - subgridD.q[DIR_PMP ] = &subgridDistances[DIR_PMP *numberOfSubgridIndices]; - subgridD.q[DIR_MPP ] = &subgridDistances[DIR_MPP *numberOfSubgridIndices]; - subgridD.q[DIR_PPM ] = &subgridDistances[DIR_PPM *numberOfSubgridIndices]; - subgridD.q[DIR_MMM ] = &subgridDistances[DIR_MMM *numberOfSubgridIndices]; - subgridD.q[DIR_PMM ] = &subgridDistances[DIR_PMM *numberOfSubgridIndices]; - subgridD.q[DIR_MPM ] = &subgridDistances[DIR_MPM *numberOfSubgridIndices]; -} - -__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight) -{ - return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq); -} - -__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, - const real& omega, const real& velocity, const real weight) -{ - - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 - + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q); -} - -__inline__ __device__ real getBounceBackDistributionForVeloBC( const real& f, - const real& velocity, const real weight) -{ - - return f - (c6o1 * weight * velocity); -} - -__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, - const real& omega) -{ - - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 - + (q * (f + fInverse)) / (c1o1 + q); -} - - -__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, - const real& omega, const real& drho, const real& velocity, const real weight) -{ - - return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 - + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho; -} - - - -#endif diff --git a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu index 51368bbe09e6fc43a7a1ff6b8b15387417774964..b05cb9201ce30038bd6edf52e2e95a13c6f6d7d4 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu @@ -14,7 +14,7 @@ #include <iomanip> -//#include "Core/Logger/Logger.h" +#include "cuda/CudaGrid.h" #include "Parameter/Parameter.h" // includes, kernels @@ -24,7 +24,7 @@ using namespace vf::lbm::constant; using namespace vf::lbm::dir; -__global__ void kineticEnergyKernel (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat); +__global__ void kineticEnergyKernel (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes); __host__ __device__ inline void kineticEnergyFunction(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint index); @@ -35,56 +35,42 @@ bool KineticEnergyAnalyzer::run(uint iter) if( iter % this->analyzeIter != 0 ) return false; int lev = 0; - int size_Mat = this->para->getParD(lev)->numberOfNodes; - - thrust::device_vector<real> kineticEnergy(size_Mat, c0o1); - thrust::device_vector<uint> isFluid (size_Mat, 0); - - unsigned int numberOfThreads = 128; - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX, - para->getParD(lev)->velocityY, - para->getParD(lev)->velocityZ, - para->getParD(lev)->rho, - para->getParD(lev)->pressure, - para->getParD(lev)->typeOfGridNode, - para->getParD(lev)->neighborX, - para->getParD(lev)->neighborY, - para->getParD(lev)->neighborZ, - para->getParD(lev)->numberOfNodes, - para->getParD(lev)->distributions.f[0], - para->getParD(lev)->isEvenTimestep); - getLastCudaError("LBCalcMacSP27 execution failed"); - - kineticEnergyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX, - para->getParD(lev)->velocityY, - para->getParD(lev)->velocityZ, - para->getParD(lev)->rho, - para->getParD(lev)->neighborX, - para->getParD(lev)->neighborY, - para->getParD(lev)->neighborZ, - para->getParD(lev)->neighborInverse, - para->getParD(lev)->typeOfGridNode, - kineticEnergy.data().get(), - isFluid.data().get(), - size_Mat); - cudaDeviceSynchronize(); - - getLastCudaError("kineticEnergyKernel execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes); + + thrust::device_vector<real> kineticEnergy( this->para->getParD(lev)->numberOfNodes, c0o1); + thrust::device_vector<uint> isFluid ( this->para->getParD(lev)->numberOfNodes, 0); + + LBCalcMacCompSP27<<< grid.grid, grid.threads >>>( + para->getParD(lev)->velocityX, + para->getParD(lev)->velocityY, + para->getParD(lev)->velocityZ, + para->getParD(lev)->rho, + para->getParD(lev)->pressure, + para->getParD(lev)->typeOfGridNode, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + para->getParD(lev)->numberOfNodes, + para->getParD(lev)->distributions.f[0], + para->getParD(lev)->isEvenTimestep); + getLastCudaError("LBCalcMacCompSP27 execution failed"); + + kineticEnergyKernel<<< grid.grid, grid.threads >>>( + para->getParD(lev)->velocityX, + para->getParD(lev)->velocityY, + para->getParD(lev)->velocityZ, + para->getParD(lev)->rho, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + para->getParD(lev)->neighborInverse, + para->getParD(lev)->typeOfGridNode, + kineticEnergy.data().get(), + isFluid.data().get(), + para->getParD(lev)->numberOfNodes); + cudaDeviceSynchronize(); + + getLastCudaError("kineticEnergyKernel execution failed"); real EKin = thrust::reduce(kineticEnergy.begin(), kineticEnergy.end(), c0o1, thrust::plus<real>()); uint numberOfFluidNodes = thrust::reduce(isFluid.begin(), isFluid.end(), 0, thrust::plus<uint>()); @@ -99,7 +85,7 @@ bool KineticEnergyAnalyzer::run(uint iter) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat) +__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes) { ////////////////////////////////////////////////////////////////////////// const uint x = threadIdx.x; // Globaler x-Index @@ -115,7 +101,7 @@ __global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uin //if( index % 34 == 0 || index % 34 == 33 ) return; - if( index >= size_Mat) return; + if( index >= (uint)numberOfLBnodes) return; unsigned int BC; BC = geo[index]; diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu index 63fc5be0ebe5d4a26d4662ee8c0dddbc3098247a..4faea21102b6a68dd9a0aa30e9cecc7eba6051b0 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu @@ -18,2176 +18,1644 @@ #include "Parameter/Parameter.h" ////////////////////////////////////////////////////////////////////////// -void KernelCas27( unsigned int grid_nx, - unsigned int grid_ny, - unsigned int grid_nz, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, grid_nz ); // Gitter fuer Kollision und Propagation - - LB_Kernel_Casc27<<< grid, threads >>>( s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Casc27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelCasSP27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Casc_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelCasSPMS27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_MS_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelCasSPMSOHM27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Casc_SP_MS_OHM_27<<< grid, threads >>>( s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed"); +void KernelCas27( + unsigned int grid_nx, + unsigned int grid_ny, + unsigned int grid_nz, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, grid_nz ); // Gitter fuer Kollision und Propagation + + LB_Kernel_Casc27<<< grid, threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Casc27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelCasSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Casc_SP_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Casc_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelCasSPMS27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Casc_SP_MS_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelCasSPMSOHM27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Casc_SP_MS_OHM_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void KernelKumCompSRTSP27( - unsigned int numberOfThreads, - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid, threads >>>( - omega, - bcMatD, - neighborX, - neighborY, - neighborZ, - DDStart, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelKum1hSP27( unsigned int numberOfThreads, - real omega, - real deltaPhi, - real angularVelocity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - real* DDStart, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_1h_SP_27<<< grid, threads >>>(omega, - deltaPhi, - angularVelocity, - bcMatD, - neighborX, - neighborY, - neighborZ, - coordX, - coordY, - coordZ, - DDStart, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelCascadeSP27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Cascade_SP_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelKumNewSP27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_New_SP_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelKumNewCompSP27(unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - int size_Array, - int level, - real* forces, - bool EvenOrOdd) -{ - //int Grid = size_Array / numberOfThreads; - //dim3 grid(Grid, 1, 1); - //dim3 threads(numberOfThreads, 1, 1 ); - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - //LB_Kernel_Kum_New_Comp_SP_27<<< grid, threads >>>( s9, - // bcMatD, - // neighborX, - // neighborY, - // neighborZ, - // DD, - // size_Mat, - // level, - // forces, - // EvenOrOdd); - //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed"); -} - -////////////////////////////////////////////////////////////////////////// -void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - int size_Array, - int level, - real* forces, - bool EvenOrOdd) -{ - //int Grid = size_Array / numberOfThreads; - //dim3 grid(Grid, 1, 1); - //dim3 threads(numberOfThreads, 1, 1 ); - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid, threads >>>( s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CumulantOnePreconditionedChimCompSP27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - int size_Array, - int level, - real* forces, - bool EvenOrOdd) -{ - //int Grid = size_Array / numberOfThreads; - //dim3 grid(Grid, 1, 1); - //dim3 threads(numberOfThreads, 1, 1 ); - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid, threads >>>( s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CumulantOneChimCompSP27(unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - int size_Array, - int level, - real* forces, - bool EvenOrOdd) -{ - //int Grid = size_Array / numberOfThreads; - //dim3 grid(Grid, 1, 1); - //dim3 threads(numberOfThreads, 1, 1 ); - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - Cumulant_One_chim_Comp_SP_27 <<< grid, threads >>>( s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelKumIsoTestSP27(unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - real* dxxUx, - real* dyyUy, - real* dzzUz, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_IsoTest_SP_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - dxxUx, - dyyUy, - dzzUz, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelKumCompSP27( unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_Kum_Comp_SP_27<<< grid, threads >>>(s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelPMCumOneCompSP27(unsigned int numberOfThreads, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - int size_Mat, - int level, - real* forces, - real porosity, - real darcy, - real forchheimer, - unsigned int sizeOfPorousMedia, - unsigned int* nodeIdsPorousMedia, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>(omega, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - level, - forces, - porosity, - darcy, - forchheimer, - sizeOfPorousMedia, - nodeIdsPorousMedia, - EvenOrOdd); - getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed"); + unsigned int numberOfThreads, + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DDStart, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>( + omega, + bcMatD, + neighborX, + neighborY, + neighborZ, + DDStart, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelKum1hSP27( + unsigned int numberOfThreads, + real omega, + real deltaPhi, + real angularVelocity, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + real* DDStart, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>( + omega, + deltaPhi, + angularVelocity, + bcMatD, + neighborX, + neighborY, + neighborZ, + coordX, + coordY, + coordZ, + DDStart, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Kum_1h_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelCascadeSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelKumNewSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + + LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelKumNewCompSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + int size_Array, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>( s9, + // bcMatD, + // neighborX, + // neighborY, + // neighborZ, + // DD, + // numberOfLBnodes, + // level, + // forces, + // EvenOrOdd); + //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed"); +} + +////////////////////////////////////////////////////////////////////////// +void CumulantOnePreconditionedErrorDiffusionChimCompSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + int size_Array, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CumulantOnePreconditionedChimCompSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + int size_Array, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CumulantOneChimCompSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + int size_Array, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelKumIsoTestSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + real* dxxUx, + real* dyyUy, + real* dzzUz, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + dxxUx, + dyyUy, + dzzUz, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelKumCompSP27( + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + + LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelPMCumOneCompSP27( + unsigned int numberOfThreads, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + int level, + real* forces, + real porosity, + real darcy, + real forchheimer, + unsigned int sizeOfPorousMedia, + unsigned int* nodeIdsPorousMedia, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>( + omega, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + level, + forces, + porosity, + darcy, + forchheimer, + sizeOfPorousMedia, + nodeIdsPorousMedia, + EvenOrOdd); + getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void KernelWaleBySoniMalavCumAA2016CompSP27( - unsigned int numberOfThreads, - real s9, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - real* veloX, - real* veloY, - real* veloZ, - real* DD, - real* turbulentViscosity, - int size_Mat, - int size_Array, - int level, - real* forces, - bool EvenOrOdd) -{ - //int Grid = size_Array / numberOfThreads; - //dim3 grid(Grid, 1, 1); - //dim3 threads(numberOfThreads, 1, 1 ); - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2, 1); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid, threads >> >( - s9, - bcMatD, - neighborX, - neighborY, - neighborZ, - neighborWSB, - veloX, - veloY, - veloZ, - DD, - turbulentViscosity, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelADincomp7( unsigned int numberOfThreads, - real diffusivity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - real* DD7, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_AD_Incomp_7<<< grid, threads >>>( diffusivity, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - DD7, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void KernelADincomp27( unsigned int numberOfThreads, - real diffusivity, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - real* DD27, - int size_Mat, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LB_Kernel_AD_Incomp_27<<< grid, threads >>>( diffusivity, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - DD27, - size_Mat, - EvenOrOdd); - getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void Init27( int myid, - int numprocs, - real u0, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* vParab, - unsigned int size_Mat, - unsigned int grid_nx, - unsigned int grid_ny, - unsigned int grid_nz, - real* DD, - int level, - int maxlevel) -{ - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, grid_nz ); // Gitter fuer Kollision und Propagation - - LBInit27<<< grid, threads >>> ( myid, - numprocs, - u0, - geoD, - neighborX, - neighborY, - neighborZ, - vParab, - size_Mat, - grid_nx, - grid_ny, - grid_nz, - DD, - level, - maxlevel); - getLastCudaError("LBInit27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void InitNonEqPartSP27( unsigned int numberOfThreads, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - unsigned int* geoD, - real* rho, - real* ux, - real* uy, - real* uz, - unsigned int size_Mat, - real* DD, - real omega, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBInitNonEqPartSP27<<< grid, threads >>>( neighborX, - neighborY, - neighborZ, - neighborWSB, - geoD, - rho, - ux, - uy, - uz, - size_Mat, - DD, - omega, - EvenOrOdd); - getLastCudaError("LBInitNonEqPartSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void InitThS7( unsigned int numberOfThreads, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* geoD, - real* Conc, - real* ux, - real* uy, - real* uz, - unsigned int size_Mat, - real* DD7, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitAD7<<< grid, threads >>>( neighborX, - neighborY, - neighborZ, - geoD, - Conc, - ux, - uy, - uz, - size_Mat, - DD7, - EvenOrOdd); - getLastCudaError("InitAD7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void InitADDev27( unsigned int numberOfThreads, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* geoD, - real* Conc, - real* ux, - real* uy, - real* uz, - unsigned int size_Mat, - real* DD27, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitAD27<<< grid, threads >>>(neighborX, - neighborY, - neighborZ, - geoD, - Conc, - ux, - uy, - uz, - size_Mat, - DD27, - EvenOrOdd); - getLastCudaError("InitAD27 execution failed"); + unsigned int numberOfThreads, + real s9, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + real* veloX, + real* veloY, + real* veloZ, + real* DD, + real* turbulentViscosity, + unsigned long long numberOfLBnodes, + int size_Array, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 <<< grid.grid, grid.threads >>>( + s9, + bcMatD, + neighborX, + neighborY, + neighborZ, + neighborWSB, + veloX, + veloY, + veloZ, + DD, + turbulentViscosity, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelADincomp7( + unsigned int numberOfThreads, + real diffusivity, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + real* DD7, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>( + diffusivity, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + DD7, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void KernelADincomp27( + unsigned int numberOfThreads, + real diffusivity, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + real* DD27, + unsigned long long numberOfLBnodes, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( + diffusivity, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + DD27, + numberOfLBnodes, + EvenOrOdd); + getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void Init27( + int myid, + int numprocs, + real u0, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* vParab, + unsigned long long numberOfLBnodes, + unsigned int grid_nx, + unsigned int grid_ny, + unsigned int grid_nz, + real* DD, + int level, + int maxlevel) +{ + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, grid_nz ); + + LBInit27<<< grid, threads >>> ( + myid, + numprocs, + u0, + geoD, + neighborX, + neighborY, + neighborZ, + vParab, + numberOfLBnodes, + grid_nx, + grid_ny, + grid_nz, + DD, + level, + maxlevel); + getLastCudaError("LBInit27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void InitNonEqPartSP27( + unsigned int numberOfThreads, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + unsigned int* geoD, + real* rho, + real* ux, + real* uy, + real* uz, + unsigned long long numberOfLBnodes, + real* DD, + real omega, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>( + neighborX, + neighborY, + neighborZ, + neighborWSB, + geoD, + rho, + ux, + uy, + uz, + numberOfLBnodes, + DD, + omega, + EvenOrOdd); + getLastCudaError("LBInitNonEqPartSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void InitThS7( + unsigned int numberOfThreads, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* geoD, + real* Conc, + real* ux, + real* uy, + real* uz, + unsigned long long numberOfLBnodes, + real* DD7, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + InitAD7<<< grid.grid, grid.threads >>>( + neighborX, + neighborY, + neighborZ, + geoD, + Conc, + ux, + uy, + uz, + numberOfLBnodes, + DD7, + EvenOrOdd); + getLastCudaError("InitAD7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void InitADDev27( + unsigned int numberOfThreads, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* geoD, + real* Conc, + real* ux, + real* uy, + real* uz, + unsigned long long numberOfLBnodes, + real* DD27, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + InitAD27<<< grid.grid, grid.threads >>>( + neighborX, + neighborY, + neighborZ, + geoD, + Conc, + ux, + uy, + uz, + numberOfLBnodes, + DD27, + EvenOrOdd); + getLastCudaError("InitAD27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void PostProcessorF3_2018Fehlberg( - unsigned int numberOfThreads, - real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* rhoOut, - real* vxOut, - real* vyOut, - real* vzOut, - real* DDStart, - real* G6, - int size_Mat, - int level, - real* forces, - bool EvenOrOdd) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_PostProcessor_F3_2018_Fehlberg <<< grid, threads >>> ( omega, - bcMatD, - neighborX, - neighborY, - neighborZ, - rhoOut, - vxOut, - vyOut, - vzOut, - DDStart, - G6, - size_Mat, - level, - forces, - EvenOrOdd); - getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMac27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int grid_nx, - unsigned int grid_ny, - unsigned int grid_nz, - real* DD, - bool isEvenTimestep) + unsigned int numberOfThreads, + real omega, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* rhoOut, + real* vxOut, + real* vyOut, + real* vzOut, + real* DDStart, + real* G6, + unsigned long long numberOfLBnodes, + int level, + real* forces, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> ( + omega, + bcMatD, + neighborX, + neighborY, + neighborZ, + rhoOut, + vxOut, + vyOut, + vzOut, + DDStart, + G6, + numberOfLBnodes, + level, + forces, + EvenOrOdd); + getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMac27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int grid_nx, + unsigned int grid_ny, + unsigned int grid_nz, + real* DD, + bool isEvenTimestep) { dim3 threads ( grid_nx, 1, 1 ); dim3 grid ( grid_ny, grid_nz ); - LBCalcMac27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMac27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMacSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacSP27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMacSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMacCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacCompSP27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMacSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMacThS7( real* Conc, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD7, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcConc7<<< grid, threads >>> (Conc, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD7, - isEvenTimestep); - getLastCudaError("CalcConc7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void PlaneConcThS7(real* Conc, - int* kPC, - unsigned int numberOfPointskPC, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD7, - bool isEvenTimestep) -{ - int Grid = (numberOfPointskPC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetPlaneConc7<<< grid, threads >>> ( Conc, - kPC, - numberOfPointskPC, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD7, - isEvenTimestep); - getLastCudaError("GetPlaneConc7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void PlaneConcThS27(real* Conc, - int* kPC, - unsigned int numberOfPointskPC, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD27, - bool isEvenTimestep) -{ - int Grid = (numberOfPointskPC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetPlaneConc27<<< grid, threads >>> ( Conc, - kPC, - numberOfPointskPC, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD27, - isEvenTimestep); - getLastCudaError("GetPlaneConc27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcConcentration27( unsigned int numberOfThreads, - real* Conc, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD27, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcConc27<<< grid, threads >>> ( Conc, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD27, - isEvenTimestep); - getLastCudaError("CalcConc27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMedSP27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMedSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMedCompSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMedCompSP27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMedSP27 execution failed"); + LBCalcMac27<<< grid, threads >>> ( + vxD, + vyD, + vzD, + rhoD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMac27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMacSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMacSP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMacSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMacCompSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMacCompSP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMacCompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMacThS7( + real* Conc, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD7, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + CalcConc7<<< grid.grid, grid.threads >>> ( + Conc, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD7, + isEvenTimestep); + getLastCudaError("CalcConc7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void PlaneConcThS7( + real* Conc, + int* kPC, + unsigned int numberOfPointskPC, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD7, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC); + + GetPlaneConc7<<< grid.grid, grid.threads >>> ( + Conc, + kPC, + numberOfPointskPC, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD7, + isEvenTimestep); + getLastCudaError("GetPlaneConc7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void PlaneConcThS27( + real* Conc, + int* kPC, + unsigned int numberOfPointskPC, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD27, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC); + + GetPlaneConc27<<< grid.grid, grid.threads >>> ( + Conc, + kPC, + numberOfPointskPC, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD27, + isEvenTimestep); + getLastCudaError("GetPlaneConc27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcConcentration27( + unsigned int numberOfThreads, + real* Conc, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD27, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + CalcConc27<<< grid.grid, grid.threads >>> ( + Conc, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD27, + isEvenTimestep); + getLastCudaError("CalcConc27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMedSP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMedSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMedCompSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMedCompSP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMedCompSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcMedCompAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - real* DD_AD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBCalcMedCompAD27 <<< grid, threads >>> ( - vxD, - vyD, - vzD, - rhoD, - pressD, - concD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - DD_AD, - isEvenTimestep); - getLastCudaError("LBCalcMedAD27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcMacMedSP27( real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int tdiff, - unsigned int size_Mat, - unsigned int numberOfThreads, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMacMedSP27<<< grid, threads >>> ( vxD, - vyD, - vzD, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - tdiff, - size_Mat, - isEvenTimestep); - getLastCudaError("LBCalcMacMedSP27 execution failed"); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + real* DD_AD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + concD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + DD_AD, + isEvenTimestep); + getLastCudaError("LBCalcMedCompAD27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcMacMedSP27( + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int tdiff, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcMacMedSP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + tdiff, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBCalcMacMedSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ResetMedianValuesSP27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - unsigned int size_Mat, - unsigned int numberOfThreads, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBResetMedianValuesSP27 << < grid, threads >> > ( - vxD, - vyD, - vzD, - rhoD, - pressD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBResetMedianValuesSP27 execution failed"); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBResetMedianValuesSP27 <<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBResetMedianValuesSP27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ResetMedianValuesAD27( - real* vxD, - real* vyD, - real* vzD, - real* rhoD, - real* pressD, - real* concD, - unsigned int size_Mat, - unsigned int numberOfThreads, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LBResetMedianValuesAD27 << < grid, threads >> > ( - vxD, - vyD, - vzD, - rhoD, - pressD, - concD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBResetMedianValuesAD27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ, - real* kyzFromfcNEQ, - real* kxzFromfcNEQ, - real* kxxMyyFromfcNEQ, - real* kxxMzzFromfcNEQ, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc2ndMomentsIncompSP27<<< grid, threads >>> ( kxyFromfcNEQ, - kyzFromfcNEQ, - kxzFromfcNEQ, - kxxMyyFromfcNEQ, - kxxMzzFromfcNEQ, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ, - real* kyzFromfcNEQ, - real* kxzFromfcNEQ, - real* kxxMyyFromfcNEQ, - real* kxxMzzFromfcNEQ, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc2ndMomentsCompSP27<<< grid, threads >>> (kxyFromfcNEQ, - kyzFromfcNEQ, - kxzFromfcNEQ, - kxxMyyFromfcNEQ, - kxxMzzFromfcNEQ, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void Calc3rdMomentsIncompSP27(real* CUMbbb, - real* CUMabc, - real* CUMbac, - real* CUMbca, - real* CUMcba, - real* CUMacb, - real* CUMcab, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc3rdMomentsIncompSP27<<< grid, threads >>> ( CUMbbb, - CUMabc, - CUMbac, - CUMbca, - CUMcba, - CUMacb, - CUMcab, - geoD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void Calc3rdMomentsCompSP27( real* CUMbbb, - real* CUMabc, - real* CUMbac, - real* CUMbca, - real* CUMcba, - real* CUMacb, - real* CUMcab, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalc3rdMomentsCompSP27<<< grid, threads >>> (CUMbbb, - CUMabc, - CUMbac, - CUMbca, - CUMcba, - CUMacb, - CUMcab, - geoD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcHigherMomentsIncompSP27(real* CUMcbb, - real* CUMbcb, - real* CUMbbc, - real* CUMcca, - real* CUMcac, - real* CUMacc, - real* CUMbcc, - real* CUMcbc, - real* CUMccb, - real* CUMccc, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcHigherMomentsIncompSP27<<< grid, threads >>> (CUMcbb, - CUMbcb, - CUMbbc, - CUMcca, - CUMcac, - CUMacc, - CUMbcc, - CUMcbc, - CUMccb, - CUMccc, - geoD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcHigherMomentsCompSP27( real* CUMcbb, - real* CUMbcb, - real* CUMbbc, - real* CUMcca, - real* CUMcac, - real* CUMacc, - real* CUMbcc, - real* CUMcbc, - real* CUMccb, - real* CUMccc, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - unsigned int numberOfThreads, - real* DD, - bool isEvenTimestep) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcHigherMomentsCompSP27<<< grid, threads >>> ( CUMcbb, - CUMbcb, - CUMbbc, - CUMcca, - CUMcac, - CUMacc, - CUMbcc, - CUMcbc, - CUMccb, - CUMccc, - geoD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep); - getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void LBCalcMeasurePoints27(real* vxMP, - real* vyMP, - real* vzMP, - real* rhoMP, - unsigned int* kMP, - unsigned int numberOfPointskMP, - unsigned int MPClockCycle, - unsigned int t, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - unsigned int numberOfThreads, - bool isEvenTimestep) -{ - int Grid = (numberOfPointskMP / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBCalcMeasurePoints<<< grid, threads >>> (vxMP, - vyMP, - vzMP, - rhoMP, - kMP, - numberOfPointskMP, - MPClockCycle, - t, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBCalcMeasurePoints execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void BcPress27( int nx, - int ny, - int tz, - unsigned int grid_nx, - unsigned int grid_ny, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned int size_Mat, - bool isEvenTimestep) -{ - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, 1 ); - - LB_BC_Press_East27<<< grid, threads >>> ( nx, - ny, - tz, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep); - getLastCudaError("LB_BC_Press_East27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void BcVel27(int nx, - int ny, - int nz, - int itz, - unsigned int grid_nx, - unsigned int grid_ny, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned int size_Mat, - bool isEvenTimestep, - real u0x, - real om) -{ - dim3 threads ( grid_nx, 1, 1 ); - dim3 grid ( grid_ny, 1 ); - - LB_BC_Vel_West_27<<< grid, threads >>> ( nx, - ny, - nz, - itz, - bcMatD, - neighborX, - neighborY, - neighborZ, - DD, - size_Mat, - isEvenTimestep, - u0x, - grid_nx, - grid_ny, - om); - getLastCudaError("LB_BC_Vel_West_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADPressDev7( unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPress7<<< gridQ, threads >>>( DD, - DD7, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADPress7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADPressDev27(unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPress27<<< gridQ, threads >>>( DD, - DD27, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADPress27 execution failed"); + real* vxD, + real* vyD, + real* vzD, + real* rhoD, + real* pressD, + real* concD, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBResetMedianValuesAD27 <<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + rhoD, + pressD, + concD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBResetMedianValuesAD27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void Calc2ndMomentsIncompSP27( + real* kxyFromfcNEQ, + real* kyzFromfcNEQ, + real* kxzFromfcNEQ, + real* kxxMyyFromfcNEQ, + real* kxxMzzFromfcNEQ, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> ( + kxyFromfcNEQ, + kyzFromfcNEQ, + kxzFromfcNEQ, + kxxMyyFromfcNEQ, + kxxMzzFromfcNEQ, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void Calc2ndMomentsCompSP27( + real* kxyFromfcNEQ, + real* kyzFromfcNEQ, + real* kxzFromfcNEQ, + real* kxxMyyFromfcNEQ, + real* kxxMzzFromfcNEQ, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> ( + kxyFromfcNEQ, + kyzFromfcNEQ, + kxzFromfcNEQ, + kxxMyyFromfcNEQ, + kxxMzzFromfcNEQ, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void Calc3rdMomentsIncompSP27( + real* CUMbbb, + real* CUMabc, + real* CUMbac, + real* CUMbca, + real* CUMcba, + real* CUMacb, + real* CUMcab, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> ( + CUMbbb, + CUMabc, + CUMbac, + CUMbca, + CUMcba, + CUMacb, + CUMcab, + geoD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void Calc3rdMomentsCompSP27( + real* CUMbbb, + real* CUMabc, + real* CUMbac, + real* CUMbca, + real* CUMcba, + real* CUMacb, + real* CUMcab, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> ( + CUMbbb, + CUMabc, + CUMbac, + CUMbca, + CUMcba, + CUMacb, + CUMcab, + geoD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcHigherMomentsIncompSP27( + real* CUMcbb, + real* CUMbcb, + real* CUMbbc, + real* CUMcca, + real* CUMcac, + real* CUMacc, + real* CUMbcc, + real* CUMcbc, + real* CUMccb, + real* CUMccc, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> ( + CUMcbb, + CUMbcb, + CUMbbc, + CUMcca, + CUMcac, + CUMacc, + CUMbcc, + CUMcbc, + CUMccb, + CUMccc, + geoD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcHigherMomentsCompSP27( + real* CUMcbb, + real* CUMbcb, + real* CUMbbc, + real* CUMcca, + real* CUMcac, + real* CUMacc, + real* CUMbcc, + real* CUMcbc, + real* CUMccb, + real* CUMccc, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + + LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> ( + CUMcbb, + CUMbcb, + CUMbbc, + CUMcca, + CUMcac, + CUMacc, + CUMbcc, + CUMcbc, + CUMccb, + CUMccc, + geoD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void LBCalcMeasurePoints27( + real* vxMP, + real* vyMP, + real* vzMP, + real* rhoMP, + unsigned int* kMP, + unsigned int numberOfPointskMP, + unsigned int MPClockCycle, + unsigned int t, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + unsigned int numberOfThreads, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP); + + LBCalcMeasurePoints<<< grid.grid, grid.threads >>> ( + vxMP, + vyMP, + vzMP, + rhoMP, + kMP, + numberOfPointskMP, + MPClockCycle, + t, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBCalcMeasurePoints execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void BcPress27( + int nx, + int ny, + int tz, + unsigned int grid_nx, + unsigned int grid_ny, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, 1 ); + + LB_BC_Press_East27<<< grid, threads >>> ( + nx, + ny, + tz, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("LB_BC_Press_East27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void BcVel27( + int nx, + int ny, + int nz, + int itz, + unsigned int grid_nx, + unsigned int grid_ny, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + real u0x, + real om) +{ + dim3 threads ( grid_nx, 1, 1 ); + dim3 grid ( grid_ny, 1 ); + + LB_BC_Vel_West_27<<< grid, threads >>> ( + nx, + ny, + nz, + itz, + bcMatD, + neighborX, + neighborY, + neighborZ, + DD, + numberOfLBnodes, + isEvenTimestep, + u0x, + grid_nx, + grid_ny, + om); + getLastCudaError("LB_BC_Vel_West_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADPressDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPress7<<< grid.grid, grid.threads >>>( + DD, + DD7, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADPress7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADPressDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPress27<<< grid.grid, grid.threads >>>( + DD, + DD27, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADPress27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QADPressNEQNeighborDev27( - unsigned int numberOfThreads, - real* DD, - real* DD27, - int* k_Q, - int* k_N, - int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep - ) -{ - - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressNEQNeighbor27<<< gridQ, threads >>>( - DD, - DD27, - k_Q, - k_N, - numberOfBCnodes, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep - ); - getLastCudaError("QADPressNEQNeighbor27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADVelDev7(unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVel7<<< gridQ, threads >>> ( - DD, - DD7, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADVel7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADVelDev27( unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVel27<<< gridQ, threads >>> ( DD, - DD27, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADVel27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADDev7(unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QAD7<<< gridQ, threads >>> ( DD, - DD7, - temp, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QAD7 execution failed"); + unsigned int numberOfThreads, + real* DD, + real* DD27, + int* k_Q, + int* k_N, + int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>( + DD, + DD27, + k_Q, + k_N, + numberOfBCnodes, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADPressNEQNeighbor27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADVelDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVel7<<< grid.grid, grid.threads >>> ( + DD, + DD7, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADVel7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADVelDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVel27<<< grid.grid, grid.threads >>> ( + DD, + DD27, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADVel27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QAD7<<< grid.grid, grid.threads >>> ( + DD, + DD7, + temp, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QAD7 execution failed"); } @@ -2202,1700 +1670,1430 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel( uint* neighborZ, real* distributions, real* distributionsAD, - int size_Mat, + unsigned long long numberOfLBnodes, real* forces, bool isEvenTimestep) { - int Grid = (size_Mat / numberOfThreads) + 1; - dim3 grid(Grid, 1, 1); - dim3 threads(numberOfThreads, 1, 1); + int Grid = (numberOfLBnodes / numberOfThreads) + 1; + dim3 grid(Grid, 1, 1); + dim3 threads(numberOfThreads, 1, 1); - Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> ( - omegaDiffusivity, - typeOfGridNode, - neighborX, - neighborY, - neighborZ, - distributions, - distributionsAD, - size_Mat, - forces, - isEvenTimestep); - getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed"); + Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> ( + omegaDiffusivity, + typeOfGridNode, + neighborX, + neighborY, + neighborZ, + distributions, + distributionsAD, + numberOfLBnodes, + forces, + isEvenTimestep); + getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed"); } ////////////////////////////////////////////////////////////////////////// void ADSlipVelDevComp( - uint numberOfThreads, - real * normalX, - real * normalY, - real * normalZ, - real * distributions, - real * distributionsAD, - int* QindexArray, - real * Qarrays, - uint numberOfBCnodes, - real omegaDiffusivity, - uint * neighborX, - uint * neighborY, - uint * neighborZ, - uint size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads) + 1; - dim3 gridQ(Grid, 1, 1); - dim3 threads(numberOfThreads, 1, 1); - - AD_SlipVelDeviceComp << < gridQ, threads >> > ( - normalX, - normalY, - normalZ, - distributions, - distributionsAD, - QindexArray, - Qarrays, - numberOfBCnodes, - omegaDiffusivity, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("AD_SlipVelDeviceComp execution failed"); -} -////////////////////////////////////////////////////////////////////////// - -void QADDirichletDev27( unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADDirichlet27<<< gridQ, threads >>> ( - DD, - DD27, - temp, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADDirichletDev27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADBBDev27(unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADBB27<<< gridQ, threads >>> ( DD, - DD27, - temp, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADBB27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QNoSlipADincompDev7(unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QNoSlipADincomp7<<< gridQ, threads >>> ( - DD, - DD7, - temp, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QNoSlipADincomp7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QNoSlipADincompDev27( unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QNoSlipADincomp27<<< gridQ, threads >>> ( - DD, - DD27, - temp, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QNoSlipADincomp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADVeloIncompDev7( unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVeloIncomp7<<< gridQ, threads >>> ( - DD, - DD7, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADVeloIncomp7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADVeloIncompDev27( unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADVeloIncomp27<<< gridQ, threads >>> ( - DD, - DD27, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADVeloIncomp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADPressIncompDev7( unsigned int numberOfThreads, - real* DD, - real* DD7, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressIncomp7<<< gridQ, threads >>>( - DD, - DD7, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADPressIncomp7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QADPressIncompDev27( unsigned int numberOfThreads, - real* DD, - real* DD27, - real* temp, - real* velo, - real diffusivity, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QADPressIncomp27<<< gridQ, threads >>>( - DD, - DD27, - temp, - velo, - diffusivity, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QADPressIncomp27 execution failed"); + uint numberOfThreads, + real * normalX, + real * normalY, + real * normalZ, + real * distributions, + real * distributionsAD, + int* QindexArray, + real * Qarrays, + uint numberOfBCnodes, + real omegaDiffusivity, + uint * neighborX, + uint * neighborY, + uint * neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + AD_SlipVelDeviceComp <<< grid.grid, grid.threads >>> ( + normalX, + normalY, + normalZ, + distributions, + distributionsAD, + QindexArray, + Qarrays, + numberOfBCnodes, + omegaDiffusivity, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("AD_SlipVelDeviceComp execution failed"); +} +////////////////////////////////////////////////////////////////////////// + +void QADDirichletDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADDirichlet27<<< grid.grid, grid.threads >>> ( + DD, + DD27, + temp, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADDirichletDev27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADBBDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADBB27<<< grid.grid, grid.threads >>> ( + DD, + DD27, + temp, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADBB27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QNoSlipADincompDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QNoSlipADincomp7<<< grid.grid, grid.threads >>> ( + DD, + DD7, + temp, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QNoSlipADincomp7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QNoSlipADincompDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QNoSlipADincomp27<<< grid.grid, grid.threads >>> ( + DD, + DD27, + temp, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QNoSlipADincomp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADVeloIncompDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVeloIncomp7<<< grid.grid, grid.threads >>> ( + DD, + DD7, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADVeloIncomp7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADVeloIncompDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADVeloIncomp27<<< grid.grid, grid.threads >>> ( + DD, + DD27, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADVeloIncomp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADPressIncompDev7( + unsigned int numberOfThreads, + real* DD, + real* DD7, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressIncomp7<<< grid.grid, grid.threads >>>( + DD, + DD7, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADPressIncomp7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QADPressIncompDev27( + unsigned int numberOfThreads, + real* DD, + real* DD27, + real* temp, + real* velo, + real diffusivity, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QADPressIncomp27<<< grid.grid, grid.threads >>>( + DD, + DD27, + temp, + velo, + diffusivity, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QADPressIncomp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - - QDevice27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + QDevice27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); getLastCudaError("QDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - - QDeviceComp27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QDeviceComp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QDevCompThinWalls27(unsigned int numberOfThreads, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* geom, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceCompThinWallsPartOne27 <<< gridQ, threads >>> (DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed"); - - QThinWallsPartTwo27 <<< gridQ, threads >>> ( DD, - k_Q, - QQ, - numberOfBCnodes, - geom, - neighborX, - neighborY, - neighborZ, - neighborWSB, - size_Mat, - isEvenTimestep); - getLastCudaError("QThinWallsPartTwo27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + QDeviceComp27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QDeviceComp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QDevCompThinWalls27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* geom, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed"); + + QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + geom, + neighborX, + neighborY, + neighborZ, + neighborWSB, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QThinWallsPartTwo27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1); - - QDevice3rdMomentsComp27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1); + + QDevice3rdMomentsComp27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); getLastCudaError("QDevice3rdMomentsComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void QDevIncompHighNu27( unsigned int numberOfThreads, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceIncompHighNu27<<< gridQ, threads >>> ( - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QDeviceIncompHighNu27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QDevCompHighNu27( unsigned int numberOfThreads, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QDeviceCompHighNu27<<< gridQ, threads >>> ( - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QDevice27 execution failed"); +void QDevIncompHighNu27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QDeviceIncompHighNu27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QDevCompHighNu27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QDeviceCompHighNu27<<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QDeviceCompHighNu27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QVelDevPlainBB27<<< grid, threads >>> ( - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QVelDevicePlainBB27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVelDeviceCouette27(unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDevCouette27<<< gridQ, threads >>> ( vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDevicePlainBB27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVelDevice1h27( unsigned int numberOfThreads, - int nx, - int ny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDev1h27<<< gridQ, threads >>> (nx, - ny, - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - Phi, - angularVelocity, - neighborX, - neighborY, - neighborZ, - coordX, - coordY, - coordZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDevice27 execution failed"); + QVelDevPlainBB27<<< grid, threads >>> ( + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QVelDevicePlainBB27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDeviceCouette27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDevCouette27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDevCouette27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDevice1h27( + unsigned int numberOfThreads, + int nx, + int ny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDev1h27<<< grid.grid, grid.threads >>> ( + nx, + ny, + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + Phi, + angularVelocity, + neighborX, + neighborY, + neighborZ, + coordX, + coordY, + coordZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDev1h27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QVelDevice27<<< grid, threads >>> ( - parameterDevice->nx, - parameterDevice->ny, - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QVelDevice27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVelDevCompPlusSlip27(unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompPlusSlip27<<< gridQ, threads >>> ( - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDeviceCompPlusSlip27 execution failed"); + QVelDevice27<<< grid, threads >>> ( + parameterDevice->nx, + parameterDevice->ny, + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QVelDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDevCompPlusSlip27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDeviceCompPlusSlip27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QVelDeviceComp27<<< grid, threads >>> ( - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); + QVelDeviceComp27<<< grid, threads >>> ( + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); getLastCudaError("QVelDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void QVelDevCompThinWalls27(unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* geom, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompThinWallsPartOne27<<< gridQ, threads >>> (vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed"); - - QThinWallsPartTwo27 <<< gridQ, threads >>> ( - DD, - k_Q, - QQ, - numberOfBCnodes, - geom, - neighborX, - neighborY, - neighborZ, - neighborWSB, - size_Mat, - isEvenTimestep); - getLastCudaError("QThinWallsPartTwo27 execution failed"); -} - -void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) +void QVelDevCompThinWalls27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* geom, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed"); + + QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + geom, + neighborX, + neighborY, + neighborZ, + neighborWSB, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QThinWallsPartTwo27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDevCompZeroPress27(LBMSimulationParameter *parameterDevice, QforBoundaryConditions *boundaryCondition) { dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QVelDeviceCompZeroPress27<<< grid, threads >>> ( - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QVelDeviceCompZeroPress27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVelDevIncompHighNu27(unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceIncompHighNu27<<< gridQ, threads >>> ( - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDeviceIncompHighNu27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVelDevCompHighNu27( unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVelDeviceCompHighNu27<<< gridQ, threads >>> ( - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVelDeviceComp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QVeloDevEQ27(unsigned int numberOfThreads, - real* VeloX, - real* VeloY, - real* VeloZ, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QVeloDeviceEQ27<<< gridQ, threads >>> (VeloX, - VeloY, - VeloZ, - DD, - k_Q, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVeloDeviceEQ27 execution failed"); + QVelDeviceCompZeroPress27<<< grid, threads >>> ( + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QVelDeviceCompZeroPress27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDevIncompHighNu27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDeviceIncompHighNu27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVelDevCompHighNu27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVelDeviceComp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QVeloDevEQ27( + unsigned int numberOfThreads, + real* VeloX, + real* VeloY, + real* VeloZ, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QVeloDeviceEQ27<<< grid.grid, grid.threads >>> ( + VeloX, + VeloY, + VeloZ, + DD, + k_Q, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVeloDeviceEQ27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QVeloStreetDevEQ27( - uint numberOfThreads, - real* veloXfraction, - real* veloYfraction, - int* naschVelo, - real* DD, - int* naschIndex, - int numberOfStreetNodes, - real velocityRatio, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfStreetNodes / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - QVeloStreetDeviceEQ27 << < gridQ, threads >> > ( - veloXfraction, - veloYfraction, - naschVelo, - DD, - naschIndex, - numberOfStreetNodes, - velocityRatio, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QVeloStreetDeviceEQ27 execution failed"); + uint numberOfThreads, + real* veloXfraction, + real* veloYfraction, + int* naschVelo, + real* DD, + int* naschIndex, + int numberOfStreetNodes, + real velocityRatio, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfStreetNodes); + + QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > ( + veloXfraction, + veloYfraction, + naschVelo, + DD, + naschIndex, + numberOfStreetNodes, + velocityRatio, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QVeloStreetDeviceEQ27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipDevice27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QSlipDevice27 execution failed"); + QSlipDevice27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QSlipDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipDeviceComp27TurbViscosity<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->turbViscosity, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed"); + QSlipDeviceComp27TurbViscosity<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->turbViscosity, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed"); } ////////////////////////////////////////////////////////////////////////// void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->turbViscosity, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed"); + QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->turbViscosity, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed"); } ////////////////////////////////////////////////////////////////////////// void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipDeviceComp27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QSlipDeviceComp27 execution failed"); + QSlipDeviceComp27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QSlipDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QSlipDeviceComp27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("BBSlipDeviceComp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QSlipGeomDevComp27(unsigned int numberOfThreads, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QSlipGeomDeviceComp27<<< gridQ, threads >>> (DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - NormalX, - NormalY, - NormalZ, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QSlipGeomDeviceComp27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QSlipNormDevComp27(unsigned int numberOfThreads, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QSlipNormDeviceComp27<<< gridQ, threads >>> (DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - NormalX, - NormalY, - NormalZ, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QSlipGeomDeviceComp27 execution failed"); + BBSlipDeviceComp27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("BBSlipDeviceComp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QSlipGeomDevComp27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes); + + QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + NormalX, + NormalY, + NormalZ, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QSlipGeomDeviceComp27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QSlipNormDevComp27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> ( + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + NormalX, + NormalY, + NormalZ, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QSlipNormDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QStressDevComp27(Parameter *para, QforBoundaryConditions* boundaryCondition, const int level) { - dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); - - QStressDeviceComp27<<< grid, threads >>> ( - para->getParD(level)->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - para->getParD(level)->omega, - para->getParD(level)->turbViscosity, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityY, - boundaryCondition->normalX, - boundaryCondition->normalY, - boundaryCondition->normalZ, - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - boundaryCondition->Vx1, - boundaryCondition->Vy1, - boundaryCondition->Vz1, - para->getParD(level)->wallModel.samplingOffset, - para->getParD(level)->wallModel.z0, - para->getHasWallModelMonitor(), - para->getParD(level)->wallModel.u_star, - para->getParD(level)->wallModel.Fx, - para->getParD(level)->wallModel.Fy, - para->getParD(level)->wallModel.Fz, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("QSlipDeviceComp27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); + + QStressDeviceComp27<<< grid, threads >>> ( + para->getParD(level)->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + para->getParD(level)->omega, + para->getParD(level)->turbViscosity, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityY, + boundaryCondition->normalX, + boundaryCondition->normalY, + boundaryCondition->normalZ, + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + boundaryCondition->Vx1, + boundaryCondition->Vy1, + boundaryCondition->Vz1, + para->getParD(level)->wallModel.samplingOffset, + para->getParD(level)->wallModel.z0, + para->getHasWallModelMonitor(), + para->getParD(level)->wallModel.u_star, + para->getParD(level)->wallModel.Fx, + para->getParD(level)->wallModel.Fy, + para->getParD(level)->wallModel.Fz, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("QStressDeviceComp27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void BBStressDev27(Parameter *para, QforBoundaryConditions* boundaryCondition, const int level) { - dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); - - BBStressDevice27<<< grid, threads >>> ( - para->getParD(level)->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityY, - boundaryCondition->normalX, - boundaryCondition->normalY, - boundaryCondition->normalZ, - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - boundaryCondition->Vx1, - boundaryCondition->Vy1, - boundaryCondition->Vz1, - para->getParD(level)->wallModel.samplingOffset, - para->getParD(level)->wallModel.z0, - para->getHasWallModelMonitor(), - para->getParD(level)->wallModel.u_star, - para->getParD(level)->wallModel.Fx, - para->getParD(level)->wallModel.Fy, - para->getParD(level)->wallModel.Fz, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("BBStressDevice27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); + + BBStressDevice27<<< grid, threads >>> ( + para->getParD(level)->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityY, + boundaryCondition->normalX, + boundaryCondition->normalY, + boundaryCondition->normalZ, + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + boundaryCondition->Vx1, + boundaryCondition->Vy1, + boundaryCondition->Vz1, + para->getParD(level)->wallModel.samplingOffset, + para->getParD(level)->wallModel.z0, + para->getHasWallModelMonitor(), + para->getParD(level)->wallModel.u_star, + para->getParD(level)->wallModel.Fx, + para->getParD(level)->wallModel.Fy, + para->getParD(level)->wallModel.Fz, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("BBStressDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void BBStressPressureDev27(Parameter *para, QforBoundaryConditions* boundaryCondition, const int level) { - dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); - - BBStressPressureDevice27<<< grid, threads >>> ( - para->getParD(level)->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityY, - boundaryCondition->normalX, - boundaryCondition->normalY, - boundaryCondition->normalZ, - boundaryCondition->Vx, - boundaryCondition->Vy, - boundaryCondition->Vz, - boundaryCondition->Vx1, - boundaryCondition->Vy1, - boundaryCondition->Vz1, - para->getParD(level)->wallModel.samplingOffset, - para->getParD(level)->wallModel.z0, - para->getHasWallModelMonitor(), - para->getParD(level)->wallModel.u_star, - para->getParD(level)->wallModel.Fx, - para->getParD(level)->wallModel.Fy, - para->getParD(level)->wallModel.Fz, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("BBStressDevice27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(para->getParD(level)->numberofthreads, 1, 1 ); + + BBStressPressureDevice27<<< grid, threads >>> ( + para->getParD(level)->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityY, + boundaryCondition->normalX, + boundaryCondition->normalY, + boundaryCondition->normalZ, + boundaryCondition->Vx, + boundaryCondition->Vy, + boundaryCondition->Vz, + boundaryCondition->Vx1, + boundaryCondition->Vy1, + boundaryCondition->Vz1, + para->getParD(level)->wallModel.samplingOffset, + para->getParD(level)->wallModel.z0, + para->getHasWallModelMonitor(), + para->getParD(level)->wallModel.u_star, + para->getParD(level)->wallModel.Fx, + para->getParD(level)->wallModel.Fy, + para->getParD(level)->wallModel.Fz, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("BBStressPressureDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QPressDevice27<<< grid, threads >>> ( - boundaryCondition->RhoBC, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QPressDevice27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDevAntiBB27( unsigned int numberOfThreads, - real* rhoBC, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceAntiBB27<<< gridQ, threads >>>( rhoBC, - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); + QPressDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QPressDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDevAntiBB27( + unsigned int numberOfThreads, + real* rhoBC, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>( + rhoBC, + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); getLastCudaError("QPressDeviceAntiBB27 execution failed"); } ////////////////////////////////////////////////////////////////////////// -void QPressDevFixBackflow27( unsigned int numberOfThreads, - real* rhoBC, - real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceFixBackflow27<<< gridQ, threads >>> ( rhoBC, - DD, - k_Q, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDeviceFixBackflow27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDevDirDepBot27( unsigned int numberOfThreads, - real* rhoBC, - real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceDirDepBot27<<< gridQ, threads >>> ( rhoBC, - DD, - k_Q, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDeviceDirDepBot27 execution failed"); +void QPressDevFixBackflow27( + unsigned int numberOfThreads, + real* rhoBC, + real* DD, + int* k_Q, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceFixBackflow27<<< grid.grid, grid.threads >>> ( + rhoBC, + DD, + k_Q, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDeviceFixBackflow27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDevDirDepBot27( + unsigned int numberOfThreads, + real* rhoBC, + real* DD, + int* k_Q, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceDirDepBot27<<< grid.grid, grid.threads >>> ( + rhoBC, + DD, + k_Q, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDeviceDirDepBot27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + + QPressNoRhoDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00); + getLastCudaError("QPressNoRhoDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) +{ + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QPressNoRhoDevice27<<< grid, threads >>> ( - boundaryCondition->RhoBC, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QPressNoRhoDevice27 execution failed"); + QPressZeroRhoOutflowDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep, + vf::lbm::dir::DIR_P00, + parameterDevice->outflowPressureCorrectionFactor); + getLastCudaError("QPressZeroRhoOutflowDevice27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QInflowScaleByPressDevice27<<< grid, threads >>> ( - boundaryCondition->RhoBC, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QInflowScaleByPressDevice27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDevOld27( unsigned int numberOfThreads, - real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceOld27<<< gridQ, threads >>> ( rhoBC, - DD, - k_Q, - k_N, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDeviceOld27 execution failed"); + QInflowScaleByPressDevice27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QInflowScaleByPressDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDevOld27( + unsigned int numberOfThreads, + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceOld27<<< grid.grid, grid.threads >>> ( + rhoBC, + DD, + k_Q, + k_N, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDeviceOld27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QPressDeviceIncompNEQ27<<< grid, threads >>> ( - boundaryCondition->RhoBC, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QPressDeviceIncompNEQ27 execution failed"); + QPressDeviceIncompNEQ27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QPressDeviceIncompNEQ27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QPressDeviceNEQ27<<< grid, threads >>> ( + QPressDeviceNEQ27<<< grid, threads >>> ( boundaryCondition->RhoBC, parameterDevice->distributions.f[0], boundaryCondition->k, @@ -3907,3241 +3105,2743 @@ void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi parameterDevice->neighborZ, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep); - getLastCudaError("QPressDevNEQ27 execution failed"); + getLastCudaError("QPressDevNEQ27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - QPressDeviceEQZ27<<< grid, threads >>> ( - boundaryCondition->RhoBC, - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->kN, - parameterDevice->kDistTestRE.f[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->omega, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("QPressDeviceEQZ27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDevZero27(unsigned int numberOfThreads, - real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceZero27<<< gridQ, threads >>> (DD, - k_Q, - numberOfBCnodes, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDeviceOld27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDevFake27( unsigned int numberOfThreads, - real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDeviceFake27<<< gridQ, threads >>> (rhoBC, - DD, - k_Q, - k_N, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDeviceFake27 execution failed"); + QPressDeviceEQZ27<<< grid, threads >>> ( + boundaryCondition->RhoBC, + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->kN, + parameterDevice->kDistTestRE.f[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->omega, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QPressDeviceEQZ27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDevZero27( + unsigned int numberOfThreads, + real* DD, + int* k_Q, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDeviceZero27<<< grid.grid, grid.threads >>> ( + DD, + k_Q, + numberOfBCnodes, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDeviceOld27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDevFake27( + unsigned int numberOfThreads, + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + + QPressDeviceFake27<<< grid.grid, grid.threads >>> ( + rhoBC, + DD, + k_Q, + k_N, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDeviceFake27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition) { - dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); - dim3 threads(parameterDevice->numberofthreads, 1, 1 ); + dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + dim3 threads(parameterDevice->numberofthreads, 1, 1 ); - BBDevice27<<< grid, threads >>> ( - parameterDevice->distributions.f[0], - boundaryCondition->k, - boundaryCondition->q27[0], - boundaryCondition->numberOfBCnodes, - parameterDevice->neighborX, - parameterDevice->neighborY, - parameterDevice->neighborZ, - parameterDevice->numberOfNodes, - parameterDevice->isEvenTimestep); - getLastCudaError("BBDevice27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void QPressDev27_IntBB( unsigned int numberOfThreads, - real* rho, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - QPressDevice27_IntBB<<< gridQ, threads >>> (rho, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("QPressDevice27_IntBB execution failed"); + BBDevice27<<< grid, threads >>> ( + parameterDevice->distributions.f[0], + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->numberOfBCnodes, + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("BBDevice27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPressDev27_IntBB( + unsigned int numberOfThreads, + real* rho, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + QPressDevice27_IntBB<<< grid.grid, grid.threads >>> ( + rho, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("QPressDevice27_IntBB execution failed"); } // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 ////////////////////////////////////////////////////////////////////////// -void PressSchlaffer27(unsigned int numberOfThreads, - real* rhoBC, - real* DD, - real* vx0, - real* vy0, - real* vz0, - real* deltaVz0, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - PressSchlaff27<<< gridQ, threads >>>( rhoBC, - DD, - vx0, - vy0, - vz0, - deltaVz0, - k_Q, - k_N, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("PressSchlaff27 execution failed"); +void PressSchlaffer27( + unsigned int numberOfThreads, + real* rhoBC, + real* DD, + real* vx0, + real* vy0, + real* vz0, + real* deltaVz0, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + PressSchlaff27<<< grid.grid, grid.threads >>>( + rhoBC, + DD, + vx0, + vy0, + vz0, + deltaVz0, + k_Q, + k_N, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("PressSchlaff27 execution failed"); } // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29 ////////////////////////////////////////////////////////////////////////// -void VelSchlaffer27( unsigned int numberOfThreads, - int t, - real* DD, - real* vz0, - real* deltaVz0, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - VelSchlaff27<<< gridQ, threads >>>( t, - DD, - vz0, - deltaVz0, - k_Q, - k_N, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("VelSchlaff27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void PropVelo( unsigned int numberOfThreads, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* rho, - real* ux, - real* uy, - real* uz, - int* k_Q, - unsigned int size_Prop, - unsigned int size_Mat, - unsigned int* bcMatD, - real* DD, - bool EvenOrOdd) -{ - int Grid = (size_Prop / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - PropellerBC<<< grid, threads >>>(neighborX, - neighborY, - neighborZ, - rho, - ux, - uy, - uz, - k_Q, - size_Prop, - size_Mat, - bcMatD, - DD, - EvenOrOdd); - getLastCudaError("PropellerBC execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF27<<< gridINT_CF, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF); - getLastCudaError("scaleCF27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFEff27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFEff27<<< gridINT_CF, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCFEff27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFLast27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFLast27<<< gridINT_CF, threads >>> (DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCFLast27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFpress27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFpress27<<< gridINT_CF, threads >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCFpress27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_Fix_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_Fix_27<<< gridINT_CF, threads >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_Fix_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_Fix_comp_27<<< gridINT_CF, threads >>>( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_0817_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF, - CUstream_st *stream) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_0817_comp_27<<< gridINT_CF, threads, 0, stream >>>( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_0817_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_comp_D3Q27F3_2018(real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_comp_D3Q27F3_2018 <<< gridINT_CF, threads >>>(DC, - DF, - G6, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_comp_D3Q27F3(real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF, - CUstream_st *stream) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_comp_D3Q27F3 <<< gridINT_CF, threads, 0, stream >>>( DC, - DF, - G6, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_comp_D3Q27F3 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_staggered_time_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_staggered_time_comp_27<<< gridINT_CF, threads >>>( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_Fix_27 execution failed"); +void VelSchlaffer27( + unsigned int numberOfThreads, + int t, + real* DD, + real* vz0, + real* deltaVz0, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + VelSchlaff27<<< grid.grid, grid.threads >>>( + t, + DD, + vz0, + deltaVz0, + k_Q, + k_N, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("VelSchlaff27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, + QforPrecursorBoundaryConditions* boundaryCondition, + real timeRatio, + real velocityRatio) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>( + boundaryCondition->k, + boundaryCondition->numberOfBCnodes, + boundaryCondition->numberOfPrecursorNodes, + boundaryCondition->sizeQ, + parameterDevice->omega, + parameterDevice->distributions.f[0], + boundaryCondition->q27[0], + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + boundaryCondition->planeNeighbor0PP, + boundaryCondition->planeNeighbor0PM, + boundaryCondition->planeNeighbor0MP, + boundaryCondition->planeNeighbor0MM, + boundaryCondition->weights0PP, + boundaryCondition->weights0PM, + boundaryCondition->weights0MP, + boundaryCondition->weights0MM, + boundaryCondition->last, + boundaryCondition->current, + boundaryCondition->velocityX, + boundaryCondition->velocityY, + boundaryCondition->velocityZ, + timeRatio, + velocityRatio, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, + QforPrecursorBoundaryConditions* boundaryCondition, + real timeRatio, + real velocityRatio) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>( + boundaryCondition->k, + boundaryCondition->numberOfBCnodes, + boundaryCondition->numberOfPrecursorNodes, + parameterDevice->omega, + parameterDevice->distributions.f[0], + parameterDevice->neighborX, + parameterDevice->neighborX, + parameterDevice->neighborX, + boundaryCondition->planeNeighbor0PP, + boundaryCondition->planeNeighbor0PM, + boundaryCondition->planeNeighbor0MP, + boundaryCondition->planeNeighbor0MM, + boundaryCondition->weights0PP, + boundaryCondition->weights0PM, + boundaryCondition->weights0MP, + boundaryCondition->weights0MM, + boundaryCondition->last, + boundaryCondition->current, + boundaryCondition->velocityX, + boundaryCondition->velocityY, + boundaryCondition->velocityZ, + timeRatio, + velocityRatio, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("PrecursorDeviceEQ27 execution failed"); + +} +////////////////////////////////////////////////////////////////////////// +void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, + QforPrecursorBoundaryConditions* boundaryCondition, + real timeRatio, + real velocityRatio) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>( + boundaryCondition->k, + boundaryCondition->numberOfBCnodes, + boundaryCondition->numberOfPrecursorNodes, + parameterDevice->distributions.f[0], + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + boundaryCondition->planeNeighbor0PP, + boundaryCondition->planeNeighbor0PM, + boundaryCondition->planeNeighbor0MP, + boundaryCondition->planeNeighbor0MM, + boundaryCondition->weights0PP, + boundaryCondition->weights0PM, + boundaryCondition->weights0MP, + boundaryCondition->weights0MM, + boundaryCondition->last, + boundaryCondition->current, + timeRatio, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("PrecursorDeviceDistributions execution failed"); + +} + +////////////////////////////////////////////////////////////////////////// +void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, + QforPrecursorBoundaryConditions* boundaryCondition, + real timeRatio, + real velocityRatio) +{ + + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes); + + QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>( + boundaryCondition->k, + boundaryCondition->q27[0], + boundaryCondition->sizeQ, + boundaryCondition->numberOfBCnodes, + boundaryCondition->numberOfPrecursorNodes, + parameterDevice->distributions.f[0], + parameterDevice->neighborX, + parameterDevice->neighborY, + parameterDevice->neighborZ, + boundaryCondition->planeNeighbor0PP, + boundaryCondition->planeNeighbor0PM, + boundaryCondition->planeNeighbor0MP, + boundaryCondition->planeNeighbor0MM, + boundaryCondition->weights0PP, + boundaryCondition->weights0PM, + boundaryCondition->weights0MP, + boundaryCondition->weights0MM, + boundaryCondition->last, + boundaryCondition->current, + timeRatio, + parameterDevice->numberOfNodes, + parameterDevice->isEvenTimestep); + getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); + +} +////////////////////////////////////////////////////////////////////////// +extern "C" void PropVelo( + unsigned int numberOfThreads, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* rho, + real* ux, + real* uy, + real* uz, + int* k_Q, + unsigned int size_Prop, + unsigned long long numberOfLBnodes, + unsigned int* bcMatD, + real* DD, + bool EvenOrOdd) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Prop); + + PropellerBC<<< grid.grid, grid.threads >>>( + neighborX, + neighborY, + neighborZ, + rho, + ux, + uy, + uz, + k_Q, + size_Prop, + numberOfLBnodes, + bcMatD, + DD, + EvenOrOdd); + getLastCudaError("PropellerBC execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF); + getLastCudaError("scaleCF27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFEff27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFEff27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCFEff27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFLast27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFLast27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCFLast27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFpress27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFpress27<<< grid.grid, grid.threads >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCFpress27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_Fix_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_Fix_27<<< grid.grid, grid.threads >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_Fix_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_Fix_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_Fix_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_0817_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_0817_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_comp_D3Q27F3_2018( + real* DC, + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>( + DC, + DF, + G6, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_comp_D3Q27F3( + real* DC, + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + G6, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_comp_D3Q27F3 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_staggered_time_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_staggered_time_comp_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream) { - dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellCF->kCF); - dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - - scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>( - parameterDeviceC->distributions.f[0], - parameterDeviceF->distributions.f[0], - parameterDeviceC->neighborX, - parameterDeviceC->neighborY, - parameterDeviceC->neighborZ, - parameterDeviceF->neighborX, - parameterDeviceF->neighborY, - parameterDeviceF->neighborZ, - parameterDeviceC->numberOfNodes, - parameterDeviceF->numberOfNodes, - parameterDeviceC->isEvenTimestep, - icellCF->ICellCFC, - icellCF->ICellCFF, - icellCF->kCF, - parameterDeviceC->omega, - parameterDeviceF->omega, - parameterDeviceC->vis, - parameterDeviceC->nx, - parameterDeviceC->ny, - parameterDeviceF->nx, - parameterDeviceF->ny, - offsetCF); - getLastCudaError("scaleCF_RhoSq_27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellCF->kCF); + dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); + + scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>( + parameterDeviceC->distributions.f[0], + parameterDeviceF->distributions.f[0], + parameterDeviceC->neighborX, + parameterDeviceC->neighborY, + parameterDeviceC->neighborZ, + parameterDeviceF->neighborX, + parameterDeviceF->neighborY, + parameterDeviceF->neighborZ, + parameterDeviceC->numberOfNodes, + parameterDeviceF->numberOfNodes, + parameterDeviceC->isEvenTimestep, + icellCF->ICellCFC, + icellCF->ICellCFF, + icellCF->kCF, + parameterDeviceC->omega, + parameterDeviceF->omega, + parameterDeviceC->vis, + parameterDeviceC->nx, + parameterDeviceC->ny, + parameterDeviceF->nx, + parameterDeviceF->ny, + offsetCF); + getLastCudaError("scaleCF_RhoSq_27 execution failed"); } void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream) { - dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellCF->kCF); - dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - - scaleCF_compressible<<<grid, threads, 0, stream>>>( - parameterDeviceC->distributions.f[0], - parameterDeviceF->distributions.f[0], - parameterDeviceC->neighborX, - parameterDeviceC->neighborY, - parameterDeviceC->neighborZ, - parameterDeviceF->neighborX, - parameterDeviceF->neighborY, - parameterDeviceF->neighborZ, - parameterDeviceC->numberOfNodes, - parameterDeviceF->numberOfNodes, - parameterDeviceC->isEvenTimestep, - icellCF->ICellCFC, - icellCF->ICellCFF, - icellCF->kCF, - parameterDeviceC->omega, - parameterDeviceF->omega, - offsetCF); - getLastCudaError("scaleCF_compressible execution failed"); -} - -////////////////////////////////////////////////////////////////////////// -void ScaleCF_RhoSq_3rdMom_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF, - CUstream_st *stream) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_RhoSq_3rdMom_comp_27<<< gridINT_CF, threads, 0, stream >>>( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_AA2016_comp_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF, - CUstream_st *stream) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_AA2016_comp_27<<< gridINT_CF, threads, 0, stream >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_AA2016_comp_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCF_NSPress_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCF_NSPress_27<<< gridINT_CF, threads >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offCF); - getLastCudaError("scaleCF_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFThSMG7( real* DC, - real* DF, - real* DD7C, - real* DD7F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real nu, - real diffusivity_fine, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThSMG7<<< gridINT_CF, threads >>> (DC, - DF, - DD7C, - DD7F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - nu, - diffusivity_fine, - offCF); - getLastCudaError("scaleCFThSMG7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFThS7( real* DC, - real* DF, - real* DD7C, - real* DD7F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real nu, - real diffusivity_fine, - unsigned int numberOfThreads) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThS7<<< gridINT_CF, threads >>> ( DC, - DF, - DD7C, - DD7F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - nu, - diffusivity_fine); - getLastCudaError("scaleCFThS7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleCFThS27( real* DC, - real* DF, - real* DD27C, - real* DD27F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posCSWB, - unsigned int* posFSWB, - unsigned int kCF, - real nu, - real diffusivity_fine, - unsigned int numberOfThreads, - OffCF offCF) -{ - int Grid = (kCF / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_CF(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleCFThS27<<< gridINT_CF, threads >>> ( DC, - DF, - DD27C, - DD27F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posCSWB, - posFSWB, - kCF, - nu, - diffusivity_fine, - offCF); - getLastCudaError("scaleCFThS27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF); - getLastCudaError("scaleFC27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCEff27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCEff27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFCEff27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCLast27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCLast27<<< gridINT_FC, threads >>> (DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("Kernel execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCpress27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCpress27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFCpress27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_Fix_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_Fix_27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_Fix_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_Fix_comp_27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_0817_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st *stream) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_0817_comp_27<<< gridINT_FC, threads, 0, stream >>> (DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_0817_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_comp_D3Q27F3_2018( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_comp_D3Q27F3_2018 <<< gridINT_FC, threads >>> (DC, - DF, - G6, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_comp_D3Q27F3( real* DC, - real* DF, - real* G6, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st *stream) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_comp_D3Q27F3 <<< gridINT_FC, threads, 0, stream >>> (DC, - DF, - G6, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_0817_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_staggered_time_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_staggered_time_comp_27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_Fix_27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellCF->kCF); + dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); + + scaleCF_compressible<<<grid, threads, 0, stream>>>( + parameterDeviceC->distributions.f[0], + parameterDeviceF->distributions.f[0], + parameterDeviceC->neighborX, + parameterDeviceC->neighborY, + parameterDeviceC->neighborZ, + parameterDeviceF->neighborX, + parameterDeviceF->neighborY, + parameterDeviceF->neighborZ, + parameterDeviceC->numberOfNodes, + parameterDeviceF->numberOfNodes, + parameterDeviceC->isEvenTimestep, + icellCF->ICellCFC, + icellCF->ICellCFF, + icellCF->kCF, + parameterDeviceC->omega, + parameterDeviceF->omega, + offsetCF); + getLastCudaError("scaleCF_compressible execution failed"); +} + +////////////////////////////////////////////////////////////////////////// +void ScaleCF_RhoSq_3rdMom_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_AA2016_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_AA2016_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCF_NSPress_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCF_NSPress_27<<< grid.grid, grid.threads >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offCF); + getLastCudaError("scaleCF_NSPress_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFThSMG7( + real* DC, + real* DF, + real* DD7C, + real* DD7F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real nu, + real diffusivity_fine, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThSMG7<<< grid.grid, grid.threads >>> ( + DC, + DF, + DD7C, + DD7F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + nu, + diffusivity_fine, + offCF); + getLastCudaError("scaleCFThSMG7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFThS7( + real* DC, + real* DF, + real* DD7C, + real* DD7F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real nu, + real diffusivity_fine, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThS7<<< grid.grid, grid.threads >>> ( + DC, + DF, + DD7C, + DD7F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + nu, + diffusivity_fine); + getLastCudaError("scaleCFThS7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleCFThS27( + real* DC, + real* DF, + real* DD27C, + real* DD27F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posCSWB, + unsigned int* posFSWB, + unsigned int kCF, + real nu, + real diffusivity_fine, + unsigned int numberOfThreads, + OffCF offCF) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF); + + scaleCFThS27<<< grid.grid, grid.threads >>> ( + DC, + DF, + DD27C, + DD27F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posCSWB, + posFSWB, + kCF, + nu, + diffusivity_fine, + offCF); + getLastCudaError("scaleCFThS27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF); + getLastCudaError("scaleFC27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCEff27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCEff27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFCEff27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCLast27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCLast27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("Kernel execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCpress27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCpress27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFCpress27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_Fix_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_Fix_27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_Fix_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_Fix_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_Fix_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_0817_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_0817_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_comp_D3Q27F3_2018( + real* DC, + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> ( + DC, + DF, + G6, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_comp_D3Q27F3( + real* DC, + real* DF, + real* G6, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> ( + DC, + DF, + G6, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_comp_D3Q27F3 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_staggered_time_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_staggered_time_comp_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream) { - dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellFC->kFC); - dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - - scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>( - parameterDeviceC->distributions.f[0], - parameterDeviceF->distributions.f[0], - parameterDeviceC->neighborX, - parameterDeviceC->neighborY, - parameterDeviceC->neighborZ, - parameterDeviceF->neighborX, - parameterDeviceF->neighborY, - parameterDeviceF->neighborZ, - parameterDeviceC->numberOfNodes, - parameterDeviceF->numberOfNodes, - parameterDeviceC->isEvenTimestep, - icellFC->ICellFCC, - icellFC->ICellFCF, - icellFC->kFC, - parameterDeviceC->omega, - parameterDeviceF->omega, - parameterDeviceC->vis, - parameterDeviceC->nx, - parameterDeviceC->ny, - parameterDeviceF->nx, - parameterDeviceF->ny, - offsetFC); - getLastCudaError("scaleFC_RhoSq_27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellFC->kFC); + dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); + + scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>( + parameterDeviceC->distributions.f[0], + parameterDeviceF->distributions.f[0], + parameterDeviceC->neighborX, + parameterDeviceC->neighborY, + parameterDeviceC->neighborZ, + parameterDeviceF->neighborX, + parameterDeviceF->neighborY, + parameterDeviceF->neighborZ, + parameterDeviceC->numberOfNodes, + parameterDeviceF->numberOfNodes, + parameterDeviceC->isEvenTimestep, + icellFC->ICellFCC, + icellFC->ICellFCF, + icellFC->kFC, + parameterDeviceC->omega, + parameterDeviceF->omega, + parameterDeviceC->vis, + parameterDeviceC->nx, + parameterDeviceC->ny, + parameterDeviceF->nx, + parameterDeviceF->ny, + offsetFC); + getLastCudaError("scaleFC_RhoSq_comp_27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream) { - dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellFC->kFC); - dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); - - scaleFC_compressible<<<grid, threads, 0, stream>>>( - parameterDeviceC->distributions.f[0], - parameterDeviceF->distributions.f[0], - parameterDeviceC->neighborX, - parameterDeviceC->neighborY, - parameterDeviceC->neighborZ, - parameterDeviceF->neighborX, - parameterDeviceF->neighborY, - parameterDeviceF->neighborZ, - parameterDeviceC->numberOfNodes, - parameterDeviceF->numberOfNodes, - parameterDeviceC->isEvenTimestep, - icellFC->ICellFCC, - icellFC->ICellFCF, - icellFC->kFC, - parameterDeviceC->omega, - parameterDeviceF->omega, - offsetFC); - getLastCudaError("scaleFC_compressible execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st *stream) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_RhoSq_3rdMom_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_AA2016_comp_27( real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC, - CUstream_st *stream) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_AA2016_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_AA2016_comp_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFC_NSPress_27(real* DC, - real* DF, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real omCoarse, - real omFine, - real nu, - unsigned int nxC, - unsigned int nyC, - unsigned int nxF, - unsigned int nyF, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFC_NSPress_27<<< gridINT_FC, threads >>> ( DC, - DF, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - omCoarse, - omFine, - nu, - nxC, - nyC, - nxF, - nyF, - offFC); - getLastCudaError("scaleFC_Fix_27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCThSMG7(real* DC, - real* DF, - real* DD7C, - real* DD7F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real nu, - real diffusivity_coarse, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThSMG7<<< gridINT_FC, threads >>>( DC, - DF, - DD7C, - DD7F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - nu, - diffusivity_coarse, - offFC); - getLastCudaError("scaleFCThSMG7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCThS7( real* DC, - real* DF, - real* DD7C, - real* DD7F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real nu, - real diffusivity_coarse, - unsigned int numberOfThreads) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThS7<<< gridINT_FC, threads >>>(DC, - DF, - DD7C, - DD7F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - nu, - diffusivity_coarse); - getLastCudaError("scaleFCThS7 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void ScaleFCThS27( real* DC, - real* DF, - real* DD27C, - real* DD27F, - unsigned int* neighborCX, - unsigned int* neighborCY, - unsigned int* neighborCZ, - unsigned int* neighborFX, - unsigned int* neighborFY, - unsigned int* neighborFZ, - unsigned int size_MatC, - unsigned int size_MatF, - bool isEvenTimestep, - unsigned int* posC, - unsigned int* posFSWB, - unsigned int kFC, - real nu, - real diffusivity_coarse, - unsigned int numberOfThreads, - OffFC offFC) -{ - int Grid = (kFC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridINT_FC(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - scaleFCThS27<<< gridINT_FC, threads >>>( DC, - DF, - DD27C, - DD27F, - neighborCX, - neighborCY, - neighborCZ, - neighborFX, - neighborFY, - neighborFZ, - size_MatC, - size_MatF, - isEvenTimestep, - posC, - posFSWB, - kFC, - nu, - diffusivity_coarse, - offFC); - getLastCudaError("scaleFCThS27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void DragLiftPostD27(real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - double *DragX, - double *DragY, - double *DragZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - DragLiftPost27<<< grid, threads >>>(DD, - k_Q, - QQ, - numberOfBCnodes, - DragX, - DragY, - DragZ, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("DragLift27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void DragLiftPreD27( real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - double *DragX, - double *DragY, - double *DragZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - DragLiftPre27<<< grid, threads >>>( DD, - k_Q, - QQ, - numberOfBCnodes, - DragX, - DragY, - DragZ, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("DragLift27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcCPtop27(real* DD, - int* cpIndex, - int nonCp, - double *cpPress, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (nonCp / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcCP27<<< grid, threads >>>(DD, - cpIndex, - nonCp, - cpPress, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("CalcCP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void CalcCPbottom27( real* DD, - int* cpIndex, - int nonCp, - double *cpPress, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (nonCp / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcCP27<<< grid, threads >>>(DD, - cpIndex, - nonCp, - cpPress, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("CalcCP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void GetSendFsPreDev27(real* DD, - real* bufferFs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads, - cudaStream_t stream) -{ - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - getSendFsPre27<<< grid, threads, 0, stream >>>(DD, - bufferFs, - sendIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("getSendFsPre27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void GetSendFsPostDev27(real* DD, - real* bufferFs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads, - cudaStream_t stream) -{ - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - getSendFsPost27<<< grid, threads, 0, stream >>>(DD, - bufferFs, - sendIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("getSendFsPost27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void SetRecvFsPreDev27(real* DD, - real* bufferFs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads, - cudaStream_t stream) -{ - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - setRecvFsPre27<<< grid, threads, 0, stream >>>(DD, - bufferFs, - recvIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("setRecvFsPre27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void SetRecvFsPostDev27(real* DD, - real* bufferFs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads, - cudaStream_t stream) -{ - int Grid = (buffmax / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - setRecvFsPost27<<< grid, threads, 0, stream >>>(DD, - bufferFs, - recvIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("setRecvFsPost27 execution failed"); + dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads, icellFC->kFC); + dim3 threads(parameterDeviceC->numberofthreads, 1, 1 ); + + scaleFC_compressible<<<grid, threads, 0, stream>>>( + parameterDeviceC->distributions.f[0], + parameterDeviceF->distributions.f[0], + parameterDeviceC->neighborX, + parameterDeviceC->neighborY, + parameterDeviceC->neighborZ, + parameterDeviceF->neighborX, + parameterDeviceF->neighborY, + parameterDeviceF->neighborZ, + parameterDeviceC->numberOfNodes, + parameterDeviceF->numberOfNodes, + parameterDeviceC->isEvenTimestep, + icellFC->ICellFCC, + icellFC->ICellFCF, + icellFC->kFC, + parameterDeviceC->omega, + parameterDeviceF->omega, + offsetFC); + getLastCudaError("scaleFC_compressible execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_RhoSq_3rdMom_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_AA2016_comp_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC, + CUstream_st *stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_AA2016_comp_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFC_NSPress_27( + real* DC, + real* DF, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real omCoarse, + real omFine, + real nu, + unsigned int nxC, + unsigned int nyC, + unsigned int nxF, + unsigned int nyF, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFC_NSPress_27<<< grid.grid, grid.threads >>> ( + DC, + DF, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + omCoarse, + omFine, + nu, + nxC, + nyC, + nxF, + nyF, + offFC); + getLastCudaError("scaleFC_NSPress_27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCThSMG7( + real* DC, + real* DF, + real* DD7C, + real* DD7F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real nu, + real diffusivity_coarse, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThSMG7<<< grid.grid, grid.threads >>>( + DC, + DF, + DD7C, + DD7F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + nu, + diffusivity_coarse, + offFC); + getLastCudaError("scaleFCThSMG7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCThS7( + real* DC, + real* DF, + real* DD7C, + real* DD7F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real nu, + real diffusivity_coarse, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThS7<<< grid.grid, grid.threads >>>( + DC, + DF, + DD7C, + DD7F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + nu, + diffusivity_coarse); + getLastCudaError("scaleFCThS7 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void ScaleFCThS27( + real* DC, + real* DF, + real* DD27C, + real* DD27F, + unsigned int* neighborCX, + unsigned int* neighborCY, + unsigned int* neighborCZ, + unsigned int* neighborFX, + unsigned int* neighborFY, + unsigned int* neighborFZ, + unsigned long long numberOfLBnodesC, + unsigned long long numberOfLBnodesF, + bool isEvenTimestep, + unsigned int* posC, + unsigned int* posFSWB, + unsigned int kFC, + real nu, + real diffusivity_coarse, + unsigned int numberOfThreads, + OffFC offFC) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC); + + scaleFCThS27<<< grid.grid, grid.threads >>>( + DC, + DF, + DD27C, + DD27F, + neighborCX, + neighborCY, + neighborCZ, + neighborFX, + neighborFY, + neighborFZ, + numberOfLBnodesC, + numberOfLBnodesF, + isEvenTimestep, + posC, + posFSWB, + kFC, + nu, + diffusivity_coarse, + offFC); + getLastCudaError("scaleFCThS27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void DragLiftPostD27( + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + double *DragX, + double *DragY, + double *DragZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + DragLiftPost27<<< grid.grid, grid.threads >>>( + DD, + k_Q, + QQ, + numberOfBCnodes, + DragX, + DragY, + DragZ, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("DragLiftPost27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void DragLiftPreD27( + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + double *DragX, + double *DragY, + double *DragZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + DragLiftPre27<<< grid.grid, grid.threads >>>( + DD, + k_Q, + QQ, + numberOfBCnodes, + DragX, + DragY, + DragZ, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("DragLiftPre27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcCPtop27( + real* DD, + int* cpIndex, + int nonCp, + double *cpPress, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp); + + CalcCP27<<< grid.grid, grid.threads >>>( + DD, + cpIndex, + nonCp, + cpPress, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("CalcCP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void CalcCPbottom27( + real* DD, + int* cpIndex, + int nonCp, + double *cpPress, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp); + + CalcCP27<<< grid.grid, grid.threads >>>( + DD, + cpIndex, + nonCp, + cpPress, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("CalcCP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void GetSendFsPreDev27( + real* DD, + real* bufferFs, + int* sendIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads, + cudaStream_t stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>( + DD, + bufferFs, + sendIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("getSendFsPre27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void GetSendFsPostDev27( + real* DD, + real* bufferFs, + int* sendIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads, + cudaStream_t stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>( + DD, + bufferFs, + sendIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("getSendFsPost27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void SetRecvFsPreDev27( + real* DD, + real* bufferFs, + int* recvIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads, + cudaStream_t stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>( + DD, + bufferFs, + recvIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("setRecvFsPre27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void SetRecvFsPostDev27( + real* DD, + real* bufferFs, + int* recvIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads, + cudaStream_t stream) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>( + DD, + bufferFs, + recvIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("setRecvFsPost27 execution failed"); } ////////////////////////////////////////////////////////////////////////// void getSendGsDevF3( - real* G6, - real* bufferGs, - int* sendIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (buffmax / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - getSendGsF3 <<< grid, threads >>> ( - G6, - bufferGs, - sendIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("getSendGsF3 execution failed"); + real* G6, + real* bufferGs, + int* sendIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + getSendGsF3 <<< grid.grid, grid.threads >>> ( + G6, + bufferGs, + sendIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("getSendGsF3 execution failed"); } ////////////////////////////////////////////////////////////////////////// void setRecvGsDevF3( - real* G6, - real* bufferGs, - int* recvIndex, - int buffmax, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - unsigned int numberOfThreads) -{ - int Grid = (buffmax / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid > 512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - setRecvGsF3 <<< grid, threads >>> ( - G6, - bufferGs, - recvIndex, - buffmax, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("setRecvGsF3 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void WallFuncDev27(unsigned int numberOfThreads, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (numberOfBCnodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - WallFunction27<<< gridQ, threads >>> ( - vx, - vy, - vz, - DD, - k_Q, - QQ, - numberOfBCnodes, - om1, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("WallFunction27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void SetOutputWallVelocitySP27(unsigned int numberOfThreads, - real* vxD, - real* vyD, - real* vzD, - real* vxWall, - real* vyWall, - real* vzWall, - int numberOfWallNodes, - int* kWallNodes, - real* rhoD, - real* pressD, - unsigned int* geoD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - real* DD, - bool isEvenTimestep) -{ - int Grid = (numberOfWallNodes / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - LBSetOutputWallVelocitySP27<<< gridQ, threads >>> ( vxD, - vyD, - vzD, - vxWall, - vyWall, - vzWall, - numberOfWallNodes, - kWallNodes, - rhoD, - pressD, - geoD, - neighborX, - neighborY, - neighborZ, - size_Mat, - DD, - isEvenTimestep); - getLastCudaError("LBSetOutputWallVelocitySP27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void GetVelotoForce27(unsigned int numberOfThreads, - real* DD, - int* bcIndex, - int nonAtBC, - real* Vx, - real* Vy, - real* Vz, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) -{ - int Grid = (nonAtBC / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - GetVeloforForcing27<<< gridQ, threads >>> (DD, - bcIndex, - nonAtBC, - Vx, - Vy, - Vz, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - getLastCudaError("GetVeloforForcing27 execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void InitParticlesDevice(real* coordX, - real* coordY, - real* coordZ, - real* coordParticleXlocal, - real* coordParticleYlocal, - real* coordParticleZlocal, - real* coordParticleXglobal, - real* coordParticleYglobal, - real* coordParticleZglobal, - real* veloParticleX, - real* veloParticleY, - real* veloParticleZ, - real* randArray, - unsigned int* particleID, - unsigned int* cellBaseID, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - int level, - unsigned int numberOfParticles, - unsigned int size_Mat, - unsigned int numberOfThreads) -{ - int Grid = (numberOfParticles / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - InitParticles<<< gridQ, threads >>> (coordX, - coordY, - coordZ, - coordParticleXlocal, - coordParticleYlocal, - coordParticleZlocal, - coordParticleXglobal, - coordParticleYglobal, - coordParticleZglobal, - veloParticleX, - veloParticleY, - veloParticleZ, - randArray, - particleID, - cellBaseID, - bcMatD, - neighborX, - neighborY, - neighborZ, - neighborWSB, - level, - numberOfParticles, - size_Mat); - getLastCudaError("InitParticles execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void MoveParticlesDevice(real* coordX, - real* coordY, - real* coordZ, - real* coordParticleXlocal, - real* coordParticleYlocal, - real* coordParticleZlocal, - real* coordParticleXglobal, - real* coordParticleYglobal, - real* coordParticleZglobal, - real* veloParticleX, - real* veloParticleY, - real* veloParticleZ, - real* DD, - real omega, - unsigned int* particleID, - unsigned int* cellBaseID, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int* neighborWSB, - int level, - unsigned int timestep, - unsigned int numberOfTimesteps, - unsigned int numberOfParticles, - unsigned int size_Mat, - unsigned int numberOfThreads, - bool isEvenTimestep) -{ - int Grid = (numberOfParticles / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - MoveParticles<<< gridQ, threads >>> (coordX, - coordY, - coordZ, - coordParticleXlocal, - coordParticleYlocal, - coordParticleZlocal, - coordParticleXglobal, - coordParticleYglobal, - coordParticleZglobal, - veloParticleX, - veloParticleY, - veloParticleZ, - DD, - omega, - particleID, - cellBaseID, - bcMatD, - neighborX, - neighborY, - neighborZ, - neighborWSB, - level, - timestep, - numberOfTimesteps, - numberOfParticles, - size_Mat, - isEvenTimestep); - getLastCudaError("MoveParticles execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void initRandomDevice(curandState* state, - unsigned int size_Mat, - unsigned int numberOfThreads) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - initRandom<<< gridQ, threads >>> (state); - getLastCudaError("initRandom execution failed"); -} -////////////////////////////////////////////////////////////////////////// -void generateRandomValuesDevice( curandState* state, - unsigned int size_Mat, - real* randArray, - unsigned int numberOfThreads) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - generateRandomValues<<< gridQ, threads >>> (state,randArray); - getLastCudaError("generateRandomValues execution failed"); + real* G6, + real* bufferGs, + int* recvIndex, + int buffmax, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax); + + setRecvGsF3 <<< grid.grid, grid.threads >>> ( + G6, + bufferGs, + recvIndex, + buffmax, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("setRecvGsF3 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void WallFuncDev27( + unsigned int numberOfThreads, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes); + + WallFunction27<<< grid.grid, grid.threads >>> ( + vx, + vy, + vz, + DD, + k_Q, + QQ, + numberOfBCnodes, + om1, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("WallFunction27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void SetOutputWallVelocitySP27( + unsigned int numberOfThreads, + real* vxD, + real* vyD, + real* vzD, + real* vxWall, + real* vyWall, + real* vzWall, + int numberOfWallNodes, + int* kWallNodes, + real* rhoD, + real* pressD, + unsigned int* geoD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + real* DD, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes); + + LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> ( + vxD, + vyD, + vzD, + vxWall, + vyWall, + vzWall, + numberOfWallNodes, + kWallNodes, + rhoD, + pressD, + geoD, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + DD, + isEvenTimestep); + getLastCudaError("LBSetOutputWallVelocitySP27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void GetVelotoForce27( + unsigned int numberOfThreads, + real* DD, + int* bcIndex, + int nonAtBC, + real* Vx, + real* Vy, + real* Vz, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC); + + GetVeloforForcing27<<< grid.grid, grid.threads >>> ( + DD, + bcIndex, + nonAtBC, + Vx, + Vy, + Vz, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("GetVeloforForcing27 execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void InitParticlesDevice( + real* coordX, + real* coordY, + real* coordZ, + real* coordParticleXlocal, + real* coordParticleYlocal, + real* coordParticleZlocal, + real* coordParticleXglobal, + real* coordParticleYglobal, + real* coordParticleZglobal, + real* veloParticleX, + real* veloParticleY, + real* veloParticleZ, + real* randArray, + unsigned int* particleID, + unsigned int* cellBaseID, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + int level, + unsigned int numberOfParticles, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles); + + InitParticles<<< grid.grid, grid.threads >>> ( + coordX, + coordY, + coordZ, + coordParticleXlocal, + coordParticleYlocal, + coordParticleZlocal, + coordParticleXglobal, + coordParticleYglobal, + coordParticleZglobal, + veloParticleX, + veloParticleY, + veloParticleZ, + randArray, + particleID, + cellBaseID, + bcMatD, + neighborX, + neighborY, + neighborZ, + neighborWSB, + level, + numberOfParticles, + numberOfLBnodes); + getLastCudaError("InitParticles execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void MoveParticlesDevice( + real* coordX, + real* coordY, + real* coordZ, + real* coordParticleXlocal, + real* coordParticleYlocal, + real* coordParticleZlocal, + real* coordParticleXglobal, + real* coordParticleYglobal, + real* coordParticleZglobal, + real* veloParticleX, + real* veloParticleY, + real* veloParticleZ, + real* DD, + real omega, + unsigned int* particleID, + unsigned int* cellBaseID, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned int* neighborWSB, + int level, + unsigned int timestep, + unsigned int numberOfTimesteps, + unsigned int numberOfParticles, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads, + bool isEvenTimestep) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles); + + MoveParticles<<< grid.grid, grid.threads >>> ( + coordX, + coordY, + coordZ, + coordParticleXlocal, + coordParticleYlocal, + coordParticleZlocal, + coordParticleXglobal, + coordParticleYglobal, + coordParticleZglobal, + veloParticleX, + veloParticleY, + veloParticleZ, + DD, + omega, + particleID, + cellBaseID, + bcMatD, + neighborX, + neighborY, + neighborZ, + neighborWSB, + level, + timestep, + numberOfTimesteps, + numberOfParticles, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("MoveParticles execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void initRandomDevice( + curandState* state, + unsigned long long numberOfLBnodes, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + initRandom<<< grid.grid, grid.threads >>> (state); + getLastCudaError("initRandom execution failed"); +} +////////////////////////////////////////////////////////////////////////// +void generateRandomValuesDevice( + curandState* state, + unsigned long long numberOfLBnodes, + real* randArray, + unsigned int numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray); + getLastCudaError("generateRandomValues execution failed"); } ////////////////////////////////////////////////////////////////////////// void CalcTurbulenceIntensityDevice( - real* vxx, - real* vyy, - real* vzz, - real* vxy, - real* vxz, - real* vyz, - real* vx_mean, - real* vy_mean, - real* vz_mean, - real* DD, - uint* typeOfGridNode, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep, - uint numberOfThreads) -{ - int Grid = (size_Mat / numberOfThreads)+1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid/Grid1)+1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 gridQ(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1 ); - - CalcTurbulenceIntensity<<<gridQ, threads>>>( - vxx, - vyy, - vzz, - vxy, - vxz, - vyz, - vx_mean, - vy_mean, - vz_mean, - DD, - typeOfGridNode, - neighborX, - neighborY, - neighborZ, - size_Mat, - isEvenTimestep); - - getLastCudaError("CalcTurbulenceIntensity execution failed"); + real* vxx, + real* vyy, + real* vzz, + real* vxy, + real* vxz, + real* vyz, + real* vx_mean, + real* vy_mean, + real* vz_mean, + real* DD, + uint* typeOfGridNode, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + uint numberOfThreads) +{ + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes); + CalcTurbulenceIntensity<<<grid.grid, grid.threads>>>( + vxx, + vyy, + vzz, + vxy, + vxz, + vyz, + vx_mean, + vy_mean, + vz_mean, + DD, + typeOfGridNode, + neighborX, + neighborY, + neighborZ, + numberOfLBnodes, + isEvenTimestep); + getLastCudaError("CalcTurbulenceIntensity execution failed"); } - - - - - - - - - - - - - diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu index 314687c4b29a32962b386d7c083f72b754388e5b..79dedee58afb7b11c4c3ede9911f54df65cf859f 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu @@ -1,92 +1,117 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file NoSlipBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include <lbm/constants/NumericConstants.h> -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void QDevice3rdMomentsComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &distributions[DIR_P00 *numberOfLBnodes]; - D.f[DIR_M00 ] = &distributions[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &distributions[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &distributions[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00P ] = &distributions[DIR_00P *numberOfLBnodes]; - D.f[DIR_00M ] = &distributions[DIR_00M *numberOfLBnodes]; - D.f[DIR_PP0 ] = &distributions[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_MM0 ] = &distributions[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &distributions[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &distributions[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_P0P ] = &distributions[DIR_P0P *numberOfLBnodes]; - D.f[DIR_M0M ] = &distributions[DIR_M0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &distributions[DIR_P0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &distributions[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0PP ] = &distributions[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0MM ] = &distributions[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0PM ] = &distributions[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0MP ] = &distributions[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes]; - D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes]; - D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes]; - D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes]; - D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes]; - D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes]; - D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes]; - D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes]; + D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &distributions[DIR_P00 *numberOfLBnodes]; - D.f[DIR_P00 ] = &distributions[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &distributions[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &distributions[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00M ] = &distributions[DIR_00P *numberOfLBnodes]; - D.f[DIR_00P ] = &distributions[DIR_00M *numberOfLBnodes]; - D.f[DIR_MM0 ] = &distributions[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_PP0 ] = &distributions[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &distributions[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &distributions[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_M0M ] = &distributions[DIR_P0P *numberOfLBnodes]; - D.f[DIR_P0P ] = &distributions[DIR_M0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &distributions[DIR_P0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &distributions[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0MM ] = &distributions[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0PP ] = &distributions[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0MP ] = &distributions[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0PM ] = &distributions[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes]; - D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes]; - D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes]; - D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes]; - D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes]; - D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes]; - D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes]; - D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes]; + D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -107,24 +132,24 @@ __global__ void QDevice3rdMomentsComp27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &subgridDistances[DIR_P00 * numberOfBCnodes]; - q_dirW = &subgridDistances[DIR_M00 * numberOfBCnodes]; - q_dirN = &subgridDistances[DIR_0P0 * numberOfBCnodes]; - q_dirS = &subgridDistances[DIR_0M0 * numberOfBCnodes]; - q_dirT = &subgridDistances[DIR_00P * numberOfBCnodes]; - q_dirB = &subgridDistances[DIR_00M * numberOfBCnodes]; - q_dirNE = &subgridDistances[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &subgridDistances[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &subgridDistances[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &subgridDistances[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &subgridDistances[DIR_P0P * numberOfBCnodes]; - q_dirBW = &subgridDistances[DIR_M0M * numberOfBCnodes]; - q_dirBE = &subgridDistances[DIR_P0M * numberOfBCnodes]; - q_dirTW = &subgridDistances[DIR_M0P * numberOfBCnodes]; - q_dirTN = &subgridDistances[DIR_0PP * numberOfBCnodes]; - q_dirBS = &subgridDistances[DIR_0MM * numberOfBCnodes]; - q_dirBN = &subgridDistances[DIR_0PM * numberOfBCnodes]; - q_dirTS = &subgridDistances[DIR_0MP * numberOfBCnodes]; + q_dirE = &subgridDistances[DIR_P00 * numberOfBCnodes]; + q_dirW = &subgridDistances[DIR_M00 * numberOfBCnodes]; + q_dirN = &subgridDistances[DIR_0P0 * numberOfBCnodes]; + q_dirS = &subgridDistances[DIR_0M0 * numberOfBCnodes]; + q_dirT = &subgridDistances[DIR_00P * numberOfBCnodes]; + q_dirB = &subgridDistances[DIR_00M * numberOfBCnodes]; + q_dirNE = &subgridDistances[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &subgridDistances[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &subgridDistances[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &subgridDistances[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &subgridDistances[DIR_P0P * numberOfBCnodes]; + q_dirBW = &subgridDistances[DIR_M0M * numberOfBCnodes]; + q_dirBE = &subgridDistances[DIR_P0M * numberOfBCnodes]; + q_dirTW = &subgridDistances[DIR_M0P * numberOfBCnodes]; + q_dirTN = &subgridDistances[DIR_0PP * numberOfBCnodes]; + q_dirBS = &subgridDistances[DIR_0MM * numberOfBCnodes]; + q_dirBN = &subgridDistances[DIR_0PM * numberOfBCnodes]; + q_dirTS = &subgridDistances[DIR_0MP * numberOfBCnodes]; q_dirTNE = &subgridDistances[DIR_PPP * numberOfBCnodes]; q_dirTSW = &subgridDistances[DIR_MMP * numberOfBCnodes]; q_dirTSE = &subgridDistances[DIR_PMP * numberOfBCnodes]; @@ -167,32 +192,32 @@ __global__ void QDevice3rdMomentsComp27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q, m3; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -217,63 +242,63 @@ __global__ void QDevice3rdMomentsComp27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &distributions[DIR_P00 *numberOfLBnodes]; - D.f[DIR_M00 ] = &distributions[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &distributions[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &distributions[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00P ] = &distributions[DIR_00P *numberOfLBnodes]; - D.f[DIR_00M ] = &distributions[DIR_00M *numberOfLBnodes]; - D.f[DIR_PP0 ] = &distributions[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_MM0 ] = &distributions[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &distributions[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &distributions[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_P0P ] = &distributions[DIR_P0P *numberOfLBnodes]; - D.f[DIR_M0M ] = &distributions[DIR_M0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &distributions[DIR_P0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &distributions[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0PP ] = &distributions[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0MM ] = &distributions[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0PM ] = &distributions[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0MP ] = &distributions[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes]; - D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes]; - D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes]; - D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes]; - D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes]; - D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes]; - D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes]; - D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes]; + D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &distributions[DIR_P00 *numberOfLBnodes]; - D.f[DIR_P00 ] = &distributions[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &distributions[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &distributions[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00M ] = &distributions[DIR_00P *numberOfLBnodes]; - D.f[DIR_00P ] = &distributions[DIR_00M *numberOfLBnodes]; - D.f[DIR_MM0 ] = &distributions[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_PP0 ] = &distributions[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &distributions[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &distributions[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_M0M ] = &distributions[DIR_P0P *numberOfLBnodes]; - D.f[DIR_P0P ] = &distributions[DIR_M0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &distributions[DIR_P0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &distributions[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0MM ] = &distributions[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0PP ] = &distributions[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0MP ] = &distributions[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0PM ] = &distributions[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes]; - D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes]; - D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes]; - D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes]; - D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes]; - D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes]; - D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes]; - D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes]; + D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -559,77 +584,78 @@ __global__ void QDevice3rdMomentsComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QDeviceIncompHighNu27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) +__global__ void QDeviceIncompHighNu27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *numberOfLBnodes]; - D.f[DIR_M00 ] = &DD[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00P ] = &DD[DIR_00P *numberOfLBnodes]; - D.f[DIR_00M ] = &DD[DIR_00M *numberOfLBnodes]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_P0P ] = &DD[DIR_P0P *numberOfLBnodes]; - D.f[DIR_M0M ] = &DD[DIR_M0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &DD[DIR_P0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &DD[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0PP ] = &DD[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0MM ] = &DD[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0PM ] = &DD[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0MP ] = &DD[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes]; - D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes]; - D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes]; - D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes]; - D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes]; - D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes]; - D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes]; - D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *numberOfLBnodes]; - D.f[DIR_P00 ] = &DD[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00M ] = &DD[DIR_00P *numberOfLBnodes]; - D.f[DIR_00P ] = &DD[DIR_00M *numberOfLBnodes]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_M0M ] = &DD[DIR_P0P *numberOfLBnodes]; - D.f[DIR_P0P ] = &DD[DIR_M0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &DD[DIR_P0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &DD[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0MM ] = &DD[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0PP ] = &DD[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0MP ] = &DD[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0PM ] = &DD[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes]; - D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes]; - D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes]; - D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes]; - D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes]; - D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes]; - D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes]; - D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -650,24 +676,24 @@ __global__ void QDeviceIncompHighNu27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -710,32 +736,32 @@ __global__ void QDeviceIncompHighNu27(real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -760,63 +786,63 @@ __global__ void QDeviceIncompHighNu27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *numberOfLBnodes]; - D.f[DIR_M00 ] = &DD[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00P ] = &DD[DIR_00P *numberOfLBnodes]; - D.f[DIR_00M ] = &DD[DIR_00M *numberOfLBnodes]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_P0P ] = &DD[DIR_P0P *numberOfLBnodes]; - D.f[DIR_M0M ] = &DD[DIR_M0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &DD[DIR_P0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &DD[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0PP ] = &DD[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0MM ] = &DD[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0PM ] = &DD[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0MP ] = &DD[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes]; - D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes]; - D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes]; - D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes]; - D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes]; - D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes]; - D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes]; - D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *numberOfLBnodes]; - D.f[DIR_P00 ] = &DD[DIR_M00 *numberOfLBnodes]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *numberOfLBnodes]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *numberOfLBnodes]; - D.f[DIR_00M ] = &DD[DIR_00P *numberOfLBnodes]; - D.f[DIR_00P ] = &DD[DIR_00M *numberOfLBnodes]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *numberOfLBnodes]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *numberOfLBnodes]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *numberOfLBnodes]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *numberOfLBnodes]; - D.f[DIR_M0M ] = &DD[DIR_P0P *numberOfLBnodes]; - D.f[DIR_P0P ] = &DD[DIR_M0M *numberOfLBnodes]; - D.f[DIR_M0P ] = &DD[DIR_P0M *numberOfLBnodes]; - D.f[DIR_P0M ] = &DD[DIR_M0P *numberOfLBnodes]; - D.f[DIR_0MM ] = &DD[DIR_0PP *numberOfLBnodes]; - D.f[DIR_0PP ] = &DD[DIR_0MM *numberOfLBnodes]; - D.f[DIR_0MP ] = &DD[DIR_0PM *numberOfLBnodes]; - D.f[DIR_0PM ] = &DD[DIR_0MP *numberOfLBnodes]; - D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes]; - D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes]; - D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes]; - D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes]; - D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes]; - D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes]; - D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes]; - D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes]; - D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -1055,77 +1081,77 @@ __global__ void QDeviceIncompHighNu27(real* DD, ////////////////////////////////////////////////////////////////////////////// __global__ void QDeviceCompHighNu27( - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1146,24 +1172,24 @@ __global__ void QDeviceCompHighNu27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1206,58 +1232,58 @@ __global__ void QDeviceCompHighNu27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; - //f_W = (D.f[DIR_P00 ])[ke ]; - //f_E = (D.f[DIR_M00 ])[kw ]; - //f_S = (D.f[DIR_0P0 ])[kn ]; - //f_N = (D.f[DIR_0M0 ])[ks ]; - //f_B = (D.f[DIR_00P ])[kt ]; - //f_T = (D.f[DIR_00M ])[kb ]; - //f_SW = (D.f[DIR_PP0 ])[kne ]; - //f_NE = (D.f[DIR_MM0 ])[ksw ]; - //f_NW = (D.f[DIR_PM0 ])[kse ]; - //f_SE = (D.f[DIR_MP0 ])[knw ]; - //f_BW = (D.f[DIR_P0P ])[kte ]; - //f_TE = (D.f[DIR_M0M ])[kbw ]; - //f_TW = (D.f[DIR_P0M ])[kbe ]; - //f_BE = (D.f[DIR_M0P ])[ktw ]; - //f_BS = (D.f[DIR_0PP ])[ktn ]; - //f_TN = (D.f[DIR_0MM ])[kbs ]; - //f_TS = (D.f[DIR_0PM ])[kbn ]; - //f_BN = (D.f[DIR_0MP ])[kts ]; - //f_BSW = (D.f[DIR_PPP ])[ktne ]; - //f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //f_BNW = (D.f[DIR_PMP ])[ktse ]; - //f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //f_TSW = (D.f[DIR_PPM ])[kbne ]; - //f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //f_TNW = (D.f[DIR_PMM ])[kbse ]; - //f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; + //f_W = (D.f[DIR_P00])[ke ]; + //f_E = (D.f[DIR_M00])[kw ]; + //f_S = (D.f[DIR_0P0])[kn ]; + //f_N = (D.f[DIR_0M0])[ks ]; + //f_B = (D.f[DIR_00P])[kt ]; + //f_T = (D.f[DIR_00M])[kb ]; + //f_SW = (D.f[DIR_PP0])[kne ]; + //f_NE = (D.f[DIR_MM0])[ksw ]; + //f_NW = (D.f[DIR_PM0])[kse ]; + //f_SE = (D.f[DIR_MP0])[knw ]; + //f_BW = (D.f[DIR_P0P])[kte ]; + //f_TE = (D.f[DIR_M0M])[kbw ]; + //f_TW = (D.f[DIR_P0M])[kbe ]; + //f_BE = (D.f[DIR_M0P])[ktw ]; + //f_BS = (D.f[DIR_0PP])[ktn ]; + //f_TN = (D.f[DIR_0MM])[kbs ]; + //f_TS = (D.f[DIR_0PM])[kbn ]; + //f_BN = (D.f[DIR_0MP])[kts ]; + //f_BSW = (D.f[DIR_PPP])[ktne ]; + //f_BNE = (D.f[DIR_MMP])[ktsw ]; + //f_BNW = (D.f[DIR_PMP])[ktse ]; + //f_BSE = (D.f[DIR_MPP])[ktnw ]; + //f_TSW = (D.f[DIR_PPM])[kbne ]; + //f_TNE = (D.f[DIR_MMM])[kbsw ]; + //f_TNW = (D.f[DIR_PMM])[kbse ]; + //f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -1282,63 +1308,63 @@ __global__ void QDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -1629,16 +1655,16 @@ __global__ void QDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -1646,16 +1672,9 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1673,7 +1692,7 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1705,32 +1724,32 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -1761,7 +1780,7 @@ __global__ void QDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Update distributions with subgrid distance (q) between zero and one real feq, q, velocityLB; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -1769,7 +1788,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -1777,7 +1796,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -1785,7 +1804,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -1793,7 +1812,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -1801,7 +1820,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -1809,7 +1828,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -1817,7 +1836,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -1825,7 +1844,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -1833,7 +1852,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -1841,7 +1860,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -1849,7 +1868,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -1857,7 +1876,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -1865,7 +1884,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -1873,7 +1892,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -1881,7 +1900,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -1889,7 +1908,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -1897,7 +1916,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -1905,7 +1924,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -1913,7 +1932,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -1921,7 +1940,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -1929,7 +1948,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -1937,7 +1956,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -1945,7 +1964,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -1953,7 +1972,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -1961,7 +1980,7 @@ __global__ void QDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2011,16 +2030,17 @@ __global__ void QDeviceComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QDevice27(real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) +__global__ void QDevice27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -2028,19 +2048,12 @@ __global__ void QDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// @@ -2059,7 +2072,7 @@ __global__ void QDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -2091,32 +2104,32 @@ __global__ void QDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -2148,7 +2161,7 @@ __global__ void QDevice27(real* distributions, //! - Update distributions with subgrid distance (q) between zero and one //! real feq, q, velocityLB; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -2156,7 +2169,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -2164,7 +2177,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -2172,7 +2185,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -2180,7 +2193,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -2188,7 +2201,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -2196,7 +2209,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -2204,7 +2217,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -2212,7 +2225,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -2220,7 +2233,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -2228,7 +2241,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -2236,7 +2249,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -2244,7 +2257,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -2252,7 +2265,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -2260,7 +2273,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -2268,7 +2281,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -2276,7 +2289,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -2284,7 +2297,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -2292,7 +2305,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -2300,7 +2313,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -2308,7 +2321,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -2316,7 +2329,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -2324,7 +2337,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -2332,7 +2345,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -2340,7 +2353,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -2348,7 +2361,7 @@ __global__ void QDevice27(real* distributions, (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2398,15 +2411,16 @@ __global__ void QDevice27(real* distributions, ////////////////////////////////////////////////////////////////////////////// -__global__ void BBDevice27(real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) +__global__ void BBDevice27( + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The no-slip boundary condition is executed in the following steps @@ -2414,18 +2428,11 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// // run for all indices in size of boundary condition (numberOfBCnodes) - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -2443,7 +2450,7 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; unsigned int kn = indexOfBCnode; @@ -2474,32 +2481,32 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - change the pointer to write the results in the correct array @@ -2509,32 +2516,32 @@ __global__ void BBDevice27(real* distributions, //////////////////////////////////////////////////////////////////////////////// //! - rewrite distributions if there is a sub-grid distance (q) in same direction real q; - q = (subgridD.q[DIR_P00 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00 ])[kw ]=f_E ; - q = (subgridD.q[DIR_M00 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00 ])[ke ]=f_W ; - q = (subgridD.q[DIR_0P0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0 ])[ks ]=f_N ; - q = (subgridD.q[DIR_0M0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0 ])[kn ]=f_S ; - q = (subgridD.q[DIR_00P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M ])[kb ]=f_T ; - q = (subgridD.q[DIR_00M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P ])[kt ]=f_B ; - q = (subgridD.q[DIR_PP0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0 ])[ksw ]=f_NE ; - q = (subgridD.q[DIR_MM0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0 ])[kne ]=f_SW ; - q = (subgridD.q[DIR_PM0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0 ])[knw ]=f_SE ; - q = (subgridD.q[DIR_MP0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0 ])[kse ]=f_NW ; - q = (subgridD.q[DIR_P0P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M ])[kbw ]=f_TE ; - q = (subgridD.q[DIR_M0M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P ])[kte ]=f_BW ; - q = (subgridD.q[DIR_P0M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P ])[ktw ]=f_BE ; - q = (subgridD.q[DIR_M0P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M ])[kbe ]=f_TW ; - q = (subgridD.q[DIR_0PP ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM ])[kbs ]=f_TN ; - q = (subgridD.q[DIR_0MM ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP ])[ktn ]=f_BS ; - q = (subgridD.q[DIR_0PM ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP ])[kts ]=f_BN ; - q = (subgridD.q[DIR_0MP ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM ])[kbn ]=f_TS ; - q = (subgridD.q[DIR_PPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE; - q = (subgridD.q[DIR_MMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW; - q = (subgridD.q[DIR_PPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE; - q = (subgridD.q[DIR_MMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW; - q = (subgridD.q[DIR_PMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE; - q = (subgridD.q[DIR_MPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW; - q = (subgridD.q[DIR_PMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE; - q = (subgridD.q[DIR_MPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E ; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W ; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N ; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S ; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T ; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B ; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE ; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW ; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE ; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW ; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE ; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW ; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE ; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW ; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN ; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS ; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN ; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS ; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu index 3a3ab784e6a7901c41d402629172c3c6154ffde9..22d9df4a3b4ae706dcf9b76d93940122015248f1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu @@ -29,7 +29,7 @@ __global__ void InitParticles( real* coordX, unsigned int* neighborWSB, int level, unsigned int numberOfParticles, - unsigned int size_Mat) + unsigned long long numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -72,12 +72,12 @@ __global__ void InitParticles( real* coordX, //////////////////////////////////////////////////////////////////////////////// //find random node of the fluid domain - unsigned int cbID = (unsigned int)(randArray[k]*size_Mat); - for(int i = 0; i < size_Mat;i++) + unsigned int cbID = (unsigned int)(randArray[k]*numberOfLBnodes); + for(int i = 0; i < numberOfLBnodes;i++) { //if (coordX[cbID] < 15 && coordX[cbID] > 5 && coordY[cbID] < 15 && coordY[cbID] > 5 && coordZ[cbID] < 15 && coordZ[cbID] > 5) break; if (coordX[cbID] < 5 && coordX[cbID] > 2) break; - cbID = (unsigned int)(randArray[k]*(size_Mat - i)); + cbID = (unsigned int)(randArray[k]*(numberOfLBnodes - i)); } real coordinateX; @@ -183,7 +183,7 @@ __global__ void MoveParticles( real* coordX, unsigned int timestep, unsigned int numberOfTimesteps, unsigned int numberOfParticles, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -248,63 +248,63 @@ __global__ void MoveParticles( real* coordX, { if (isEvenTimestep==true) { - feC = &DD[DIR_P00 *size_Mat]; - fwC = &DD[DIR_M00 *size_Mat]; - fnC = &DD[DIR_0P0 *size_Mat]; - fsC = &DD[DIR_0M0 *size_Mat]; - ftC = &DD[DIR_00P *size_Mat]; - fbC = &DD[DIR_00M *size_Mat]; - fneC = &DD[DIR_PP0 *size_Mat]; - fswC = &DD[DIR_MM0 *size_Mat]; - fseC = &DD[DIR_PM0 *size_Mat]; - fnwC = &DD[DIR_MP0 *size_Mat]; - fteC = &DD[DIR_P0P *size_Mat]; - fbwC = &DD[DIR_M0M *size_Mat]; - fbeC = &DD[DIR_P0M *size_Mat]; - ftwC = &DD[DIR_M0P *size_Mat]; - ftnC = &DD[DIR_0PP *size_Mat]; - fbsC = &DD[DIR_0MM *size_Mat]; - fbnC = &DD[DIR_0PM *size_Mat]; - ftsC = &DD[DIR_0MP *size_Mat]; - fzeroC = &DD[DIR_000*size_Mat]; - ftneC = &DD[DIR_PPP *size_Mat]; - ftswC = &DD[DIR_MMP *size_Mat]; - ftseC = &DD[DIR_PMP *size_Mat]; - ftnwC = &DD[DIR_MPP *size_Mat]; - fbneC = &DD[DIR_PPM *size_Mat]; - fbswC = &DD[DIR_MMM *size_Mat]; - fbseC = &DD[DIR_PMM *size_Mat]; - fbnwC = &DD[DIR_MPM *size_Mat]; + feC = &DD[DIR_P00 * numberOfLBnodes]; + fwC = &DD[DIR_M00 * numberOfLBnodes]; + fnC = &DD[DIR_0P0 * numberOfLBnodes]; + fsC = &DD[DIR_0M0 * numberOfLBnodes]; + ftC = &DD[DIR_00P * numberOfLBnodes]; + fbC = &DD[DIR_00M * numberOfLBnodes]; + fneC = &DD[DIR_PP0 * numberOfLBnodes]; + fswC = &DD[DIR_MM0 * numberOfLBnodes]; + fseC = &DD[DIR_PM0 * numberOfLBnodes]; + fnwC = &DD[DIR_MP0 * numberOfLBnodes]; + fteC = &DD[DIR_P0P * numberOfLBnodes]; + fbwC = &DD[DIR_M0M * numberOfLBnodes]; + fbeC = &DD[DIR_P0M * numberOfLBnodes]; + ftwC = &DD[DIR_M0P * numberOfLBnodes]; + ftnC = &DD[DIR_0PP * numberOfLBnodes]; + fbsC = &DD[DIR_0MM * numberOfLBnodes]; + fbnC = &DD[DIR_0PM * numberOfLBnodes]; + ftsC = &DD[DIR_0MP * numberOfLBnodes]; + fzeroC = &DD[DIR_000 * numberOfLBnodes]; + ftneC = &DD[DIR_PPP * numberOfLBnodes]; + ftswC = &DD[DIR_MMP * numberOfLBnodes]; + ftseC = &DD[DIR_PMP * numberOfLBnodes]; + ftnwC = &DD[DIR_MPP * numberOfLBnodes]; + fbneC = &DD[DIR_PPM * numberOfLBnodes]; + fbswC = &DD[DIR_MMM * numberOfLBnodes]; + fbseC = &DD[DIR_PMM * numberOfLBnodes]; + fbnwC = &DD[DIR_MPM * numberOfLBnodes]; } else { - fwC = &DD[DIR_P00 *size_Mat]; - feC = &DD[DIR_M00 *size_Mat]; - fsC = &DD[DIR_0P0 *size_Mat]; - fnC = &DD[DIR_0M0 *size_Mat]; - fbC = &DD[DIR_00P *size_Mat]; - ftC = &DD[DIR_00M *size_Mat]; - fswC = &DD[DIR_PP0 *size_Mat]; - fneC = &DD[DIR_MM0 *size_Mat]; - fnwC = &DD[DIR_PM0 *size_Mat]; - fseC = &DD[DIR_MP0 *size_Mat]; - fbwC = &DD[DIR_P0P *size_Mat]; - fteC = &DD[DIR_M0M *size_Mat]; - ftwC = &DD[DIR_P0M *size_Mat]; - fbeC = &DD[DIR_M0P *size_Mat]; - fbsC = &DD[DIR_0PP *size_Mat]; - ftnC = &DD[DIR_0MM *size_Mat]; - ftsC = &DD[DIR_0PM *size_Mat]; - fbnC = &DD[DIR_0MP *size_Mat]; - fzeroC = &DD[DIR_000*size_Mat]; - fbswC = &DD[DIR_PPP *size_Mat]; - fbneC = &DD[DIR_MMP *size_Mat]; - fbnwC = &DD[DIR_PMP *size_Mat]; - fbseC = &DD[DIR_MPP *size_Mat]; - ftswC = &DD[DIR_PPM *size_Mat]; - ftneC = &DD[DIR_MMM *size_Mat]; - ftnwC = &DD[DIR_PMM *size_Mat]; - ftseC = &DD[DIR_MPM *size_Mat]; + fwC = &DD[DIR_P00 * numberOfLBnodes]; + feC = &DD[DIR_M00 * numberOfLBnodes]; + fsC = &DD[DIR_0P0 * numberOfLBnodes]; + fnC = &DD[DIR_0M0 * numberOfLBnodes]; + fbC = &DD[DIR_00P * numberOfLBnodes]; + ftC = &DD[DIR_00M * numberOfLBnodes]; + fswC = &DD[DIR_PP0 * numberOfLBnodes]; + fneC = &DD[DIR_MM0 * numberOfLBnodes]; + fnwC = &DD[DIR_PM0 * numberOfLBnodes]; + fseC = &DD[DIR_MP0 * numberOfLBnodes]; + fbwC = &DD[DIR_P0P * numberOfLBnodes]; + fteC = &DD[DIR_M0M * numberOfLBnodes]; + ftwC = &DD[DIR_P0M * numberOfLBnodes]; + fbeC = &DD[DIR_M0P * numberOfLBnodes]; + fbsC = &DD[DIR_0PP * numberOfLBnodes]; + ftnC = &DD[DIR_0MM * numberOfLBnodes]; + ftsC = &DD[DIR_0PM * numberOfLBnodes]; + fbnC = &DD[DIR_0MP * numberOfLBnodes]; + fzeroC = &DD[DIR_000 * numberOfLBnodes]; + fbswC = &DD[DIR_PPP * numberOfLBnodes]; + fbneC = &DD[DIR_MMP * numberOfLBnodes]; + fbnwC = &DD[DIR_PMP * numberOfLBnodes]; + fbseC = &DD[DIR_MPP * numberOfLBnodes]; + ftswC = &DD[DIR_PPM * numberOfLBnodes]; + ftneC = &DD[DIR_MMM * numberOfLBnodes]; + ftnwC = &DD[DIR_PMM * numberOfLBnodes]; + ftseC = &DD[DIR_MPM * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// @@ -1055,7 +1055,7 @@ __global__ void MoveParticlesWithoutBCs( real* coordX, unsigned int timestep, unsigned int numberOfTimesteps, unsigned int numberOfParticles, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -1114,63 +1114,63 @@ __global__ void MoveParticlesWithoutBCs( real* coordX, { if (isEvenTimestep==true) { - feC = &DD[DIR_P00 *size_Mat]; - fwC = &DD[DIR_M00 *size_Mat]; - fnC = &DD[DIR_0P0 *size_Mat]; - fsC = &DD[DIR_0M0 *size_Mat]; - ftC = &DD[DIR_00P *size_Mat]; - fbC = &DD[DIR_00M *size_Mat]; - fneC = &DD[DIR_PP0 *size_Mat]; - fswC = &DD[DIR_MM0 *size_Mat]; - fseC = &DD[DIR_PM0 *size_Mat]; - fnwC = &DD[DIR_MP0 *size_Mat]; - fteC = &DD[DIR_P0P *size_Mat]; - fbwC = &DD[DIR_M0M *size_Mat]; - fbeC = &DD[DIR_P0M *size_Mat]; - ftwC = &DD[DIR_M0P *size_Mat]; - ftnC = &DD[DIR_0PP *size_Mat]; - fbsC = &DD[DIR_0MM *size_Mat]; - fbnC = &DD[DIR_0PM *size_Mat]; - ftsC = &DD[DIR_0MP *size_Mat]; - fzeroC = &DD[DIR_000*size_Mat]; - ftneC = &DD[DIR_PPP *size_Mat]; - ftswC = &DD[DIR_MMP *size_Mat]; - ftseC = &DD[DIR_PMP *size_Mat]; - ftnwC = &DD[DIR_MPP *size_Mat]; - fbneC = &DD[DIR_PPM *size_Mat]; - fbswC = &DD[DIR_MMM *size_Mat]; - fbseC = &DD[DIR_PMM *size_Mat]; - fbnwC = &DD[DIR_MPM *size_Mat]; + feC = &DD[DIR_P00 * numberOfLBnodes]; + fwC = &DD[DIR_M00 * numberOfLBnodes]; + fnC = &DD[DIR_0P0 * numberOfLBnodes]; + fsC = &DD[DIR_0M0 * numberOfLBnodes]; + ftC = &DD[DIR_00P * numberOfLBnodes]; + fbC = &DD[DIR_00M * numberOfLBnodes]; + fneC = &DD[DIR_PP0 * numberOfLBnodes]; + fswC = &DD[DIR_MM0 * numberOfLBnodes]; + fseC = &DD[DIR_PM0 * numberOfLBnodes]; + fnwC = &DD[DIR_MP0 * numberOfLBnodes]; + fteC = &DD[DIR_P0P * numberOfLBnodes]; + fbwC = &DD[DIR_M0M * numberOfLBnodes]; + fbeC = &DD[DIR_P0M * numberOfLBnodes]; + ftwC = &DD[DIR_M0P * numberOfLBnodes]; + ftnC = &DD[DIR_0PP * numberOfLBnodes]; + fbsC = &DD[DIR_0MM * numberOfLBnodes]; + fbnC = &DD[DIR_0PM * numberOfLBnodes]; + ftsC = &DD[DIR_0MP * numberOfLBnodes]; + fzeroC = &DD[DIR_000 * numberOfLBnodes]; + ftneC = &DD[DIR_PPP * numberOfLBnodes]; + ftswC = &DD[DIR_MMP * numberOfLBnodes]; + ftseC = &DD[DIR_PMP * numberOfLBnodes]; + ftnwC = &DD[DIR_MPP * numberOfLBnodes]; + fbneC = &DD[DIR_PPM * numberOfLBnodes]; + fbswC = &DD[DIR_MMM * numberOfLBnodes]; + fbseC = &DD[DIR_PMM * numberOfLBnodes]; + fbnwC = &DD[DIR_MPM * numberOfLBnodes]; } else { - fwC = &DD[DIR_P00 *size_Mat]; - feC = &DD[DIR_M00 *size_Mat]; - fsC = &DD[DIR_0P0 *size_Mat]; - fnC = &DD[DIR_0M0 *size_Mat]; - fbC = &DD[DIR_00P *size_Mat]; - ftC = &DD[DIR_00M *size_Mat]; - fswC = &DD[DIR_PP0 *size_Mat]; - fneC = &DD[DIR_MM0 *size_Mat]; - fnwC = &DD[DIR_PM0 *size_Mat]; - fseC = &DD[DIR_MP0 *size_Mat]; - fbwC = &DD[DIR_P0P *size_Mat]; - fteC = &DD[DIR_M0M *size_Mat]; - ftwC = &DD[DIR_P0M *size_Mat]; - fbeC = &DD[DIR_M0P *size_Mat]; - fbsC = &DD[DIR_0PP *size_Mat]; - ftnC = &DD[DIR_0MM *size_Mat]; - ftsC = &DD[DIR_0PM *size_Mat]; - fbnC = &DD[DIR_0MP *size_Mat]; - fzeroC = &DD[DIR_000*size_Mat]; - fbswC = &DD[DIR_PPP *size_Mat]; - fbneC = &DD[DIR_MMP *size_Mat]; - fbnwC = &DD[DIR_PMP *size_Mat]; - fbseC = &DD[DIR_MPP *size_Mat]; - ftswC = &DD[DIR_PPM *size_Mat]; - ftneC = &DD[DIR_MMM *size_Mat]; - ftnwC = &DD[DIR_PMM *size_Mat]; - ftseC = &DD[DIR_MPM *size_Mat]; + fwC = &DD[DIR_P00 * numberOfLBnodes]; + feC = &DD[DIR_M00 * numberOfLBnodes]; + fsC = &DD[DIR_0P0 * numberOfLBnodes]; + fnC = &DD[DIR_0M0 * numberOfLBnodes]; + fbC = &DD[DIR_00P * numberOfLBnodes]; + ftC = &DD[DIR_00M * numberOfLBnodes]; + fswC = &DD[DIR_PP0 * numberOfLBnodes]; + fneC = &DD[DIR_MM0 * numberOfLBnodes]; + fnwC = &DD[DIR_PM0 * numberOfLBnodes]; + fseC = &DD[DIR_MP0 * numberOfLBnodes]; + fbwC = &DD[DIR_P0P * numberOfLBnodes]; + fteC = &DD[DIR_M0M * numberOfLBnodes]; + ftwC = &DD[DIR_P0M * numberOfLBnodes]; + fbeC = &DD[DIR_M0P * numberOfLBnodes]; + fbsC = &DD[DIR_0PP * numberOfLBnodes]; + ftnC = &DD[DIR_0MM * numberOfLBnodes]; + ftsC = &DD[DIR_0PM * numberOfLBnodes]; + fbnC = &DD[DIR_0MP * numberOfLBnodes]; + fzeroC = &DD[DIR_000 * numberOfLBnodes]; + fbswC = &DD[DIR_PPP * numberOfLBnodes]; + fbneC = &DD[DIR_MMP * numberOfLBnodes]; + fbnwC = &DD[DIR_PMP * numberOfLBnodes]; + fbseC = &DD[DIR_MPP * numberOfLBnodes]; + ftswC = &DD[DIR_PPM * numberOfLBnodes]; + ftneC = &DD[DIR_MMM * numberOfLBnodes]; + ftnwC = &DD[DIR_PMM * numberOfLBnodes]; + ftseC = &DD[DIR_MPM * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// @@ -1928,7 +1928,7 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, real* NormalX, real* NormalY, real* NormalZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { @@ -1937,63 +1937,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, //Distributions27 D; //if (isEvenTimestep==true) //{ - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; //} //else //{ - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; //} ////////////////////////////////////////////////////////////////////////////////// //const unsigned x = threadIdx.x; // Globaler x-Index @@ -2015,24 +2015,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // // *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, // // *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, // // *q_dirBSE, *q_dirBNW; - // // q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - // q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - // // q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - // q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - // // q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - // q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - // // q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - // // q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - // // q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - // // q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - // // q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - // // q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - // // q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - // // q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - // // q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - // // q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - // // q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - // // q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + // // q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + // q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + // // q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + // q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + // // q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + // q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + // // q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + // // q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + // // q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + // // q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + // // q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + // // q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + // // q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + // // q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + // // q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + // // q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + // // q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + // // q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; // // q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; // // q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; // // q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -2047,24 +2047,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // // *nx_dirBE, *nx_dirTW, *nx_dirTN, *nx_dirBS, *nx_dirBN, *nx_dirTS, // // *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW, // // *nx_dirBSE, *nx_dirBNW; - // // nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; - // // nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; - // // nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; - // // nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; - // // nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; - // // nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; - // // nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; - // // nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; - // // nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; - // // nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; - // // nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; - // // nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; - // // nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; - // // nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; - // // nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; - // // nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; - // // nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; - // // nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; + // // nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; + // // nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; + // // nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; + // // nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; + // // nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; + // // nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; + // // nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; + // // nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; + // // nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; + // // nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; + // // nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; + // // nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; + // // nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; + // // nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; + // // nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; + // // nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; + // // nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; + // // nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; // // nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes]; // // nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes]; // // nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes]; @@ -2079,24 +2079,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // // *ny_dirBE, *ny_dirTW, *ny_dirTN, *ny_dirBS, *ny_dirBN, *ny_dirTS, // // *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW, // // *ny_dirBSE, *ny_dirBNW; - // // ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; - // // ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; - // // ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; - // // ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; - // // ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; - // // ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; - // // ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; - // // ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; - // // ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; - // // ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; - // // ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; - // // ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; - // // ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; - // // ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; - // // ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; - // // ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; - // // ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; - // // ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; + // // ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; + // // ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; + // // ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; + // // ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; + // // ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; + // // ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; + // // ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; + // // ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; + // // ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; + // // ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; + // // ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; + // // ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; + // // ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; + // // ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; + // // ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; + // // ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; + // // ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; + // // ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; // // ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes]; // // ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes]; // // ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes]; @@ -2111,24 +2111,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // // *nz_dirBE, *nz_dirTW, *nz_dirTN, *nz_dirBS, *nz_dirBN, *nz_dirTS, // // *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW, // // *nz_dirBSE, *nz_dirBNW; - // // nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; - // // nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; - // // nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; - // // nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; - // // nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; - // // nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; - // // nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; - // // nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; - // // nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; - // // nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; - // // nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; - // // nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; - // // nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; - // // nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; - // // nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; - // // nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; - // // nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; - // // nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; + // // nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; + // // nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; + // // nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; + // // nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; + // // nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; + // // nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; + // // nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; + // // nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; + // // nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; + // // nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; + // // nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; + // // nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; + // // nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; + // // nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; + // // nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; + // // nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; + // // nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; + // // nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; // // nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes]; // // nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes]; // // nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes]; @@ -2190,32 +2190,32 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // unsigned int ktne = KQK; // unsigned int kbsw = neighborZ[ksw]; // //////////////////////////////////////////////////////////////////////////////// - // real f_W = (D.f[DIR_P00 ])[ke ]; - // real f_E = (D.f[DIR_M00 ])[kw ]; - // real f_S = (D.f[DIR_0P0 ])[kn ]; - // real f_N = (D.f[DIR_0M0 ])[ks ]; - // real f_B = (D.f[DIR_00P ])[kt ]; - // real f_T = (D.f[DIR_00M ])[kb ]; - // real f_SW = (D.f[DIR_PP0 ])[kne ]; - // real f_NE = (D.f[DIR_MM0 ])[ksw ]; - // real f_NW = (D.f[DIR_PM0 ])[kse ]; - // real f_SE = (D.f[DIR_MP0 ])[knw ]; - // real f_BW = (D.f[DIR_P0P ])[kte ]; - // real f_TE = (D.f[DIR_M0M ])[kbw ]; - // real f_TW = (D.f[DIR_P0M ])[kbe ]; - // real f_BE = (D.f[DIR_M0P ])[ktw ]; - // real f_BS = (D.f[DIR_0PP ])[ktn ]; - // real f_TN = (D.f[DIR_0MM ])[kbs ]; - // real f_TS = (D.f[DIR_0PM ])[kbn ]; - // real f_BN = (D.f[DIR_0MP ])[kts ]; - // real f_BSW = (D.f[DIR_PPP ])[ktne ]; - // real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - // real f_BNW = (D.f[DIR_PMP ])[ktse ]; - // real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - // real f_TSW = (D.f[DIR_PPM ])[kbne ]; - // real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - // real f_TNW = (D.f[DIR_PMM ])[kbse ]; - // real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + // real f_W = (D.f[DIR_P00])[ke ]; + // real f_E = (D.f[DIR_M00])[kw ]; + // real f_S = (D.f[DIR_0P0])[kn ]; + // real f_N = (D.f[DIR_0M0])[ks ]; + // real f_B = (D.f[DIR_00P])[kt ]; + // real f_T = (D.f[DIR_00M])[kb ]; + // real f_SW = (D.f[DIR_PP0])[kne ]; + // real f_NE = (D.f[DIR_MM0])[ksw ]; + // real f_NW = (D.f[DIR_PM0])[kse ]; + // real f_SE = (D.f[DIR_MP0])[knw ]; + // real f_BW = (D.f[DIR_P0P])[kte ]; + // real f_TE = (D.f[DIR_M0M])[kbw ]; + // real f_TW = (D.f[DIR_P0M])[kbe ]; + // real f_BE = (D.f[DIR_M0P])[ktw ]; + // real f_BS = (D.f[DIR_0PP])[ktn ]; + // real f_TN = (D.f[DIR_0MM])[kbs ]; + // real f_TS = (D.f[DIR_0PM])[kbn ]; + // real f_BN = (D.f[DIR_0MP])[kts ]; + // real f_BSW = (D.f[DIR_PPP])[ktne ]; + // real f_BNE = (D.f[DIR_MMP])[ktsw ]; + // real f_BNW = (D.f[DIR_PMP])[ktse ]; + // real f_BSE = (D.f[DIR_MPP])[ktnw ]; + // real f_TSW = (D.f[DIR_PPM])[kbne ]; + // real f_TNE = (D.f[DIR_MMM])[kbsw ]; + // real f_TNW = (D.f[DIR_PMM])[kbse ]; + // real f_TSE = (D.f[DIR_MPM])[kbnw ]; // //////////////////////////////////////////////////////////////////////////////// // // real feq, q; // real vx1, vx2, vx3, drho; @@ -2241,63 +2241,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX, // ////////////////////////////////////////////////////////////////////////// // if (isEvenTimestep==false) // { - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; // } // else // { - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; // } //} } diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu new file mode 100644 index 0000000000000000000000000000000000000000..177eb41587896dd7993b06f98a1506abfc4f3f5f --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu @@ -0,0 +1,1157 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PrecursorBCs27.cu +//! \ingroup GPU +//! \author Henry Korb, Henrik Asmuth +//====================================================================================== +#include "LBM/LB.h" +#include <lbm/constants/NumericConstants.h> +#include <lbm/constants/D3Q27.h> +#include <lbm/MacroscopicQuantities.h> + +#include "LBM/GPUHelperFunctions/KernelUtilities.h" + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; +using namespace vf::gpu; + +__global__ void QPrecursorDeviceCompZeroPress( + int* subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + int sizeQ, + real omega, + real* distributions, + real* subgridDistances, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + if(nodeIndex>=numberOfBCnodes) return; + + //////////////////////////////////////////////////////////////////////////////// + // interpolation of velocity + real vxLastInterpd, vyLastInterpd, vzLastInterpd; + real vxNextInterpd, vyNextInterpd, vzNextInterpd; + + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; + + real* vxLast = vLast; + real* vyLast = &vLast[numberOfPrecursorNodes]; + real* vzLast = &vLast[2*numberOfPrecursorNodes]; + + real* vxCurrent = vCurrent; + real* vyCurrent = &vCurrent[numberOfPrecursorNodes]; + real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes]; + + if(d0PP < 1e6) + { + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; + + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; + + real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); + + vxLastInterpd = (vxLast[kNeighbor0PP]*d0PP + vxLast[kNeighbor0PM]*d0PM + vxLast[kNeighbor0MP]*d0MP + vxLast[kNeighbor0MM]*d0MM)*invWeightSum; + vyLastInterpd = (vyLast[kNeighbor0PP]*d0PP + vyLast[kNeighbor0PM]*d0PM + vyLast[kNeighbor0MP]*d0MP + vyLast[kNeighbor0MM]*d0MM)*invWeightSum; + vzLastInterpd = (vzLast[kNeighbor0PP]*d0PP + vzLast[kNeighbor0PM]*d0PM + vzLast[kNeighbor0MP]*d0MP + vzLast[kNeighbor0MM]*d0MM)*invWeightSum; + + vxNextInterpd = (vxCurrent[kNeighbor0PP]*d0PP + vxCurrent[kNeighbor0PM]*d0PM + vxCurrent[kNeighbor0MP]*d0MP + vxCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + vyNextInterpd = (vyCurrent[kNeighbor0PP]*d0PP + vyCurrent[kNeighbor0PM]*d0PM + vyCurrent[kNeighbor0MP]*d0MP + vyCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + vzNextInterpd = (vzCurrent[kNeighbor0PP]*d0PP + vzCurrent[kNeighbor0PM]*d0PM + vzCurrent[kNeighbor0MP]*d0MP + vzCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + } + else + { + vxLastInterpd = vxLast[kNeighbor0PP]; + vyLastInterpd = vyLast[kNeighbor0PP]; + vzLastInterpd = vzLast[kNeighbor0PP]; + + vxNextInterpd = vxCurrent[kNeighbor0PP]; + vyNextInterpd = vyCurrent[kNeighbor0PP]; + vzNextInterpd = vzCurrent[kNeighbor0PP]; + } + + // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); + real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio; + // From here on just a copy of QVelDeviceCompZeroPress + //////////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[nodeIndex]; + unsigned int k000= KQK; + unsigned int kP00 = KQK; + unsigned int kM00 = neighborX[KQK]; + unsigned int k0P0 = KQK; + unsigned int k0M0 = neighborY[KQK]; + unsigned int k00P = KQK; + unsigned int k00M = neighborZ[KQK]; + unsigned int kMM0 = neighborY[kM00]; + unsigned int kPP0 = KQK; + unsigned int kPM0 = k0M0; + unsigned int kMP0 = kM00; + unsigned int kM0M = neighborZ[kM00]; + unsigned int kP0P = KQK; + unsigned int kP0M = k00M; + unsigned int kM0P = kM00; + unsigned int k0PP = KQK; + unsigned int k0MM = neighborZ[k0M0]; + unsigned int k0PM = k00M; + unsigned int k0MP = k0M0; + unsigned int kPMP = k0M0; + unsigned int kMPM = kM0M; + unsigned int kMPP = kM00; + unsigned int kPMM = k0MM; + unsigned int kMMP = kMM0; + unsigned int kPPM = k00M; + unsigned int kPPP = KQK; + unsigned int kMMM = neighborZ[kMM0]; + + //////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions + //! + real f_M00 = (dist.f[DIR_P00])[kP00]; + real f_P00 = (dist.f[DIR_M00])[kM00]; + real f_0M0 = (dist.f[DIR_0P0])[k0P0]; + real f_0P0 = (dist.f[DIR_0M0])[k0M0]; + real f_00M = (dist.f[DIR_00P])[k00P]; + real f_00P = (dist.f[DIR_00M])[k00M]; + real f_MM0 = (dist.f[DIR_PP0])[kPP0]; + real f_PP0 = (dist.f[DIR_MM0])[kMM0]; + real f_MP0 = (dist.f[DIR_PM0])[kPM0]; + real f_PM0 = (dist.f[DIR_MP0])[kMP0]; + real f_M0M = (dist.f[DIR_P0P])[kP0P]; + real f_P0P = (dist.f[DIR_M0M])[kM0M]; + real f_M0P = (dist.f[DIR_P0M])[kP0M]; + real f_P0M = (dist.f[DIR_M0P])[kM0P]; + real f_0MM = (dist.f[DIR_0PP])[k0PP]; + real f_0PP = (dist.f[DIR_0MM])[k0MM]; + real f_0MP = (dist.f[DIR_0PM])[k0PM]; + real f_0PM = (dist.f[DIR_0MP])[k0MP]; + real f_MMM = (dist.f[DIR_PPP])[kPPP]; + real f_PPM = (dist.f[DIR_MMP])[kMMP]; + real f_MPM = (dist.f[DIR_PMP])[kPMP]; + real f_PMM = (dist.f[DIR_MPP])[kMPP]; + real f_MMP = (dist.f[DIR_PPM])[kPPM]; + real f_PPP = (dist.f[DIR_MMM])[kMMM]; + real f_MPP = (dist.f[DIR_PMM])[kPMM]; + real f_PMP = (dist.f[DIR_MPM])[kMPM]; + + SubgridDistances27 subgridD; + getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes); + + //////////////////////////////////////////////////////////////////////////////// + real drho = f_PMP + f_MPP + f_PPP + f_MMP + f_PMM + f_MPM + f_PPM + f_MMM + + f_0PM + f_0PP + f_0MP + f_0MM + f_P0M + f_M0P + f_P0P + f_M0M + f_PM0 + f_MP0 + f_PP0 + f_MM0 + + f_00P + f_00M + f_0P0 + f_0M0 + f_P00 + f_M00 + ((dist.f[DIR_000])[k000]); + + real vx1 = (((f_PMP - f_MPM) - (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) + + ((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0)) + + (f_P00 - f_M00)) / (c1o1 + drho); + + + real vx2 = ((-(f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) + + ((f_0PM - f_0MP) + (f_0PP - f_0MM)) + (-(f_PM0 - f_MP0) + (f_PP0 - f_MM0)) + + (f_0P0 - f_0M0)) / (c1o1 + drho); + + real vx3 = (((f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) + (f_MMP - f_PPM)) + + (-(f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_P0P - f_M0M) - (f_P0M - f_M0P)) + + (f_00P - f_00M)) / (c1o1 + drho); + + + // if(k==16383 || k==0) printf("k %d kQ %d drho = %f u %f v %f w %f\n",k, KQK, drho, vx1, vx2, vx3); + real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho); + ////////////////////////////////////////////////////////////////////////// + + + //////////////////////////////////////////////////////////////////////////////// + //! - Update distributions with subgrid distance (q) between zero and one + real feq, q, velocityLB, velocityBC; + q = (subgridD.q[DIR_P00])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one + { + velocityLB = vx1; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloX; + (dist.f[DIR_M00])[kM00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P00, f_M00, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_M00])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloX; + (dist.f[DIR_P00])[kP00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M00, f_P00, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_0P0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloY; + (dist.f[DIR_0M0])[DIR_0M0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0P0, f_0M0, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_0M0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloY; + (dist.f[DIR_0P0])[k0P0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0M0, f_0P0, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_00P])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = VeloZ; + (dist.f[DIR_00M])[k00M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00P, f_00M, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_00M])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27); + velocityBC = -VeloZ; + (dist.f[DIR_00P])[k00P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00M, f_00P, feq, omega, drho, velocityBC, c2o27); + } + + q = (subgridD.q[DIR_PP0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX + VeloY; + (dist.f[DIR_MM0])[kMM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PP0, f_MM0, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_MM0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloY; + (dist.f[DIR_PP0])[kPP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MM0, f_PP0, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_PM0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX - VeloY; + (dist.f[DIR_MP0])[kMP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PM0, f_MP0, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_MP0])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX + VeloY; + (dist.f[DIR_PM0])[kPM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MP0, f_PM0, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_P0P])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX + VeloZ; + (dist.f[DIR_M0M])[kM0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0P, f_M0M, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_M0M])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kP0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0M, f_P0P, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_P0M])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloX - VeloZ; + (dist.f[DIR_M0P])[kM0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0M, f_M0P, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_M0P])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX + VeloZ; + (dist.f[DIR_P0M])[kP0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0P, f_P0M, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0PP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloY + VeloZ; + (dist.f[DIR_0MM])[k0MM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0MM, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0MM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloY - VeloZ; + (dist.f[DIR_0PP])[k0PP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0MM, f_0PP, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0PM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = VeloY - VeloZ; + (dist.f[DIR_0MP])[k0MP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PM, f_0PP, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_0MP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloY + VeloZ; + (dist.f[DIR_0PM])[k0PM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0PM, feq, omega, drho, velocityBC, c1o54); + } + + q = (subgridD.q[DIR_PPP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX + VeloY + VeloZ; + (dist.f[DIR_MMM])[kMMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPP, f_MMM, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MMM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX - VeloY - VeloZ; + (dist.f[DIR_PPP])[kPPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMM, f_PPP, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PPM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 + vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX + VeloY - VeloZ; + (dist.f[DIR_MMP])[kMMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPM, f_MMP, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MMP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 - vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX - VeloY + VeloZ; + (dist.f[DIR_PPM])[kPPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMP, f_PPM, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PMP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX - VeloY + VeloZ; + (dist.f[DIR_MPM])[kMPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMP, f_MPM, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MPM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX + VeloY - VeloZ; + (dist.f[DIR_PMP])[kPMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPM, f_PMP, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_PMM])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = vx1 - vx2 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = VeloX - VeloY - VeloZ; + (dist.f[DIR_MPP])[kMPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMM, f_MPP, feq, omega, drho, velocityBC, c1o216); + } + + q = (subgridD.q[DIR_MPP])[nodeIndex]; + if (q>=c0o1 && q<=c1o1) + { + velocityLB = -vx1 + vx2 + vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216); + velocityBC = -VeloX + VeloY + VeloZ; + (dist.f[DIR_PMM])[kPMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPP, f_PMM, feq, omega, drho, velocityBC, c1o216); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void PrecursorDeviceEQ27( + int *subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real omega, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* vLast, + real* vCurrent, + real velocityX, + real velocityY, + real velocityZ, + real timeRatio, + real velocityRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + if(nodeIndex>=numberOfBCnodes) return; + + //////////////////////////////////////////////////////////////////////////////// + // interpolation of velocity + real vxLastInterpd, vyLastInterpd, vzLastInterpd; + real vxNextInterpd, vyNextInterpd, vzNextInterpd; + + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; + + real* vxLast = vLast; + real* vyLast = &vLast[numberOfPrecursorNodes]; + real* vzLast = &vLast[2*numberOfPrecursorNodes]; + + real* vxCurrent = vCurrent; + real* vyCurrent = &vCurrent[numberOfPrecursorNodes]; + real* vzCurrent = &vCurrent[2*numberOfPrecursorNodes]; + + if(d0PP < 1e6) + { + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; + + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; + + real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); + + vxLastInterpd = (vxLast[kNeighbor0PP]*d0PP + vxLast[kNeighbor0PM]*d0PM + vxLast[kNeighbor0MP]*d0MP + vxLast[kNeighbor0MM]*d0MM)*invWeightSum; + vyLastInterpd = (vyLast[kNeighbor0PP]*d0PP + vyLast[kNeighbor0PM]*d0PM + vyLast[kNeighbor0MP]*d0MP + vyLast[kNeighbor0MM]*d0MM)*invWeightSum; + vzLastInterpd = (vzLast[kNeighbor0PP]*d0PP + vzLast[kNeighbor0PM]*d0PM + vzLast[kNeighbor0MP]*d0MP + vzLast[kNeighbor0MM]*d0MM)*invWeightSum; + + vxNextInterpd = (vxCurrent[kNeighbor0PP]*d0PP + vxCurrent[kNeighbor0PM]*d0PM + vxCurrent[kNeighbor0MP]*d0MP + vxCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + vyNextInterpd = (vyCurrent[kNeighbor0PP]*d0PP + vyCurrent[kNeighbor0PM]*d0PM + vyCurrent[kNeighbor0MP]*d0MP + vyCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + vzNextInterpd = (vzCurrent[kNeighbor0PP]*d0PP + vzCurrent[kNeighbor0PM]*d0PM + vzCurrent[kNeighbor0MP]*d0MP + vzCurrent[kNeighbor0MM]*d0MM)*invWeightSum; + } + else + { + vxLastInterpd = vxLast[kNeighbor0PP]; + vyLastInterpd = vyLast[kNeighbor0PP]; + vzLastInterpd = vzLast[kNeighbor0PP]; + + vxNextInterpd = vxCurrent[kNeighbor0PP]; + vyNextInterpd = vyCurrent[kNeighbor0PP]; + vzNextInterpd = vzCurrent[kNeighbor0PP]; + } + + // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd); + real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio; + real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; + real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio; + // From here on just a copy of QVelDeviceCompZeroPress + //////////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[nodeIndex]; //QK + unsigned int k000 = KQK; //000 + unsigned int kP00 = KQK; //P00 + unsigned int kM00 = neighborX[KQK]; //M00 + unsigned int k0P0 = KQK; //n + unsigned int k0M0 = neighborY[KQK]; //s + unsigned int k00P = KQK; //t + unsigned int k00M = neighborZ[KQK]; //b + unsigned int kMM0 = neighborY[kM00]; //sw + unsigned int kPP0 = KQK; //ne + unsigned int kPM0 = k0M0; //se + unsigned int kMP0 = kM00; //nw + unsigned int kM0M = neighborZ[kM00]; //bw + unsigned int kP0P = KQK; //te + unsigned int kP0M = k00M; //be + unsigned int k0PP = KQK; //tn + unsigned int k0MM = neighborZ[k0M0]; //bs + unsigned int kM0P = kM00; //tw + unsigned int k0PM = k00M; //bn + unsigned int k0MP = k0M0; //ts + unsigned int kPMP = k0M0; //tse + unsigned int kMPM = kM0M; //bnw + unsigned int kMPP = kM00; //tnw + unsigned int kPMM = k0MM; //bse + unsigned int kMMP = kMM0; //tsw + unsigned int kPPM = k00M; //bne + unsigned int kPPP = KQK; //tne + unsigned int kMMM = neighborZ[kMM0]; //bsw + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // based on BGK Plus Comp + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + real f_M00 = (dist.f[DIR_P00])[kP00]; + real f_P00 = (dist.f[DIR_M00])[kM00]; + real f_0M0 = (dist.f[DIR_0P0])[k0P0]; + real f_0P0 = (dist.f[DIR_0M0])[k0M0]; + real f_00M = (dist.f[DIR_00P])[k00P]; + real f_00P = (dist.f[DIR_00M])[k00M]; + real f_MM0 = (dist.f[DIR_PP0])[kPP0]; + real f_PP0 = (dist.f[DIR_MM0])[kMM0]; + real f_MP0 = (dist.f[DIR_PM0])[kPM0]; + real f_PM0 = (dist.f[DIR_MP0])[kMP0]; + real f_M0M = (dist.f[DIR_P0P])[kP0P]; + real f_P0P = (dist.f[DIR_M0M])[kM0M]; + real f_M0P = (dist.f[DIR_P0M])[kP0M]; + real f_P0M = (dist.f[DIR_M0P])[kM0P]; + real f_0MM = (dist.f[DIR_0PP])[k0PP]; + real f_0PP = (dist.f[DIR_0MM])[k0MM]; + real f_0PM = (dist.f[DIR_0MP])[k0MP]; + real f_0MP = (dist.f[DIR_0PM])[k0PM]; + real f_000 = (dist.f[DIR_000])[k000]; + real f_MMM = (dist.f[DIR_PPP])[kPPP]; + real f_PPM = (dist.f[DIR_MMP])[kMMP]; + real f_MPM = (dist.f[DIR_PMP])[kPMP]; + real f_PMM = (dist.f[DIR_MPP])[kMPP]; + real f_MMP = (dist.f[DIR_PPM])[kPPM]; + real f_PPP = (dist.f[DIR_MMM])[kMMM]; + real f_MPP = (dist.f[DIR_PMM])[kPMM]; + real f_PMP = (dist.f[DIR_MPM])[kMPM]; + + //////////////////////////////////////////////////////////////////////////////// + //! - Set macroscopic quantities + //! + real drho = c0o1; + + real vx1 = VeloX; + + real vx2 = VeloY; + + real vx3 = VeloZ; + + real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); + + //////////////////////////////////////////////////////////////////////////////// + f_000 = c8o27* (drho-(drho+c1o1)*cusq); + f_P00 = c2o27* (drho+(drho+c1o1)*(c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cusq)); + f_M00 = c2o27* (drho+(drho+c1o1)*(c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cusq)); + f_0P0 = c2o27* (drho+(drho+c1o1)*(c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cusq)); + f_0M0 = c2o27* (drho+(drho+c1o1)*(c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cusq)); + f_00P = c2o27* (drho+(drho+c1o1)*(c3o1*( vx3)+c9o2*( vx3)*( vx3)-cusq)); + f_00M = c2o27* (drho+(drho+c1o1)*(c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cusq)); + f_PP0 = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cusq)); + f_MM0 = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cusq)); + f_PM0 = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cusq)); + f_MP0 = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cusq)); + f_P0P = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cusq)); + f_M0M = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cusq)); + f_P0M = c1o54* (drho+(drho+c1o1)*(c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cusq)); + f_M0P = c1o54* (drho+(drho+c1o1)*(c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); + f_0PP = c1o54* (drho+(drho+c1o1)*(c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cusq)); + f_0MM = c1o54* (drho+(drho+c1o1)*(c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cusq)); + f_0PM = c1o54* (drho+(drho+c1o1)*(c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cusq)); + f_0MP = c1o54* (drho+(drho+c1o1)*(c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cusq)); + f_PPP = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq)); + f_MMM = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq)); + f_PPM = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq)); + f_MMP = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq)); + f_PMP = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq)); + f_MPM = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq)); + f_PMM = c1o216*(drho+(drho+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); + f_MPP = c1o216*(drho+(drho+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); + + //////////////////////////////////////////////////////////////////////////////// + //! write the new distributions to the bc nodes + //! + (dist.f[DIR_P00])[kP00] = f_M00; + (dist.f[DIR_PP0])[kPP0] = f_MM0; + (dist.f[DIR_P0M])[kP0M] = f_M0P; + (dist.f[DIR_PM0])[kPM0] = f_MP0; + (dist.f[DIR_PMP])[kPMP] = f_MPM; + (dist.f[DIR_P0P])[kP0P] = f_M0M; + (dist.f[DIR_PPM])[kPPM] = f_MMP; + (dist.f[DIR_PPP])[kPPP] = f_MMM; + (dist.f[DIR_PMM])[kPMM] = f_MPP; + + (dist.f[DIR_M00])[kM00] = f_P00; + (dist.f[DIR_MM0])[kMM0] = f_PP0; + (dist.f[DIR_M0M])[kM0M] = f_P0P; + (dist.f[DIR_MP0])[kMP0] = f_PM0; + (dist.f[DIR_M0P])[kM0P] = f_P0M; + (dist.f[DIR_MMM])[kMMM] = f_PPP; + (dist.f[DIR_MMP])[kMMP] = f_PPM; + (dist.f[DIR_MPP])[kMPP] = f_PMM; + (dist.f[DIR_MPM])[kMPM] = f_PMP; + + (dist.f[DIR_0P0])[k0P0] = f_0M0; + (dist.f[DIR_0M0])[k0M0] = f_0P0; + (dist.f[DIR_00P])[k00P] = f_00M; + (dist.f[DIR_00M])[k00M] = f_00P; + (dist.f[DIR_0PP])[k0PP] = f_0MM; + (dist.f[DIR_0MM])[k0MM] = f_0PP; + (dist.f[DIR_0PM])[k0PM] = f_0MP; + (dist.f[DIR_0MP])[k0MP] = f_0PM; + (dist.f[DIR_000])[k000] = f_000; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void PrecursorDeviceDistributions( + int *subgridDistanceIndices, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + if(nodeIndex>=numberOfBCnodes) return; + + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; + + real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; + real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; + + real* f0Last = fsLast; + real* f1Last = &fsLast[ numberOfPrecursorNodes]; + real* f2Last = &fsLast[2*numberOfPrecursorNodes]; + real* f3Last = &fsLast[3*numberOfPrecursorNodes]; + real* f4Last = &fsLast[4*numberOfPrecursorNodes]; + real* f5Last = &fsLast[5*numberOfPrecursorNodes]; + real* f6Last = &fsLast[6*numberOfPrecursorNodes]; + real* f7Last = &fsLast[7*numberOfPrecursorNodes]; + real* f8Last = &fsLast[8*numberOfPrecursorNodes]; + + real* f0Next = fsNext; + real* f1Next = &fsNext[ numberOfPrecursorNodes]; + real* f2Next = &fsNext[2*numberOfPrecursorNodes]; + real* f3Next = &fsNext[3*numberOfPrecursorNodes]; + real* f4Next = &fsNext[4*numberOfPrecursorNodes]; + real* f5Next = &fsNext[5*numberOfPrecursorNodes]; + real* f6Next = &fsNext[6*numberOfPrecursorNodes]; + real* f7Next = &fsNext[7*numberOfPrecursorNodes]; + real* f8Next = &fsNext[8*numberOfPrecursorNodes]; + + + if(d0PP<1e6) + { + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; + + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; + + real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); + + f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum; + f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum; + f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum; + f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum; + f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum; + f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum; + f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum; + f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum; + f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum; + f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum; + + } else { + f0LastInterp = f0Last[kNeighbor0PP]; + f1LastInterp = f1Last[kNeighbor0PP]; + f2LastInterp = f2Last[kNeighbor0PP]; + f3LastInterp = f3Last[kNeighbor0PP]; + f4LastInterp = f4Last[kNeighbor0PP]; + f5LastInterp = f5Last[kNeighbor0PP]; + f6LastInterp = f6Last[kNeighbor0PP]; + f7LastInterp = f7Last[kNeighbor0PP]; + f8LastInterp = f8Last[kNeighbor0PP]; + + f0NextInterp = f0Next[kNeighbor0PP]; + f1NextInterp = f1Next[kNeighbor0PP]; + f2NextInterp = f2Next[kNeighbor0PP]; + f3NextInterp = f3Next[kNeighbor0PP]; + f4NextInterp = f4Next[kNeighbor0PP]; + f5NextInterp = f5Next[kNeighbor0PP]; + f6NextInterp = f6Next[kNeighbor0PP]; + f7NextInterp = f7Next[kNeighbor0PP]; + f8NextInterp = f8Next[kNeighbor0PP]; + } + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[nodeIndex]; + // unsigned int k000= KQK; + unsigned int kP00 = KQK; + // unsigned int kM00 = neighborX[KQK]; + // unsigned int k0P0 = KQK; + unsigned int k0M0 = neighborY[KQK]; + // unsigned int k00P = KQK; + unsigned int k00M = neighborZ[KQK]; + // unsigned int kMM0 = neighborY[kM00]; + unsigned int kPP0 = KQK; + unsigned int kPM0 = k0M0; + // unsigned int kMP0 = kM00; + // unsigned int kM0M = neighborZ[kM00]; + unsigned int kP0P = KQK; + unsigned int kP0M = k00M; + // unsigned int kM0P = kM00; + unsigned int k0MM = neighborZ[k0M0]; + // unsigned int k0PM = k00M; + // unsigned int k0MP = k0M0; + unsigned int kPMP = k0M0; + // unsigned int kMPM = kM0M; + // unsigned int kMPP = kM00; + unsigned int kPMM = k0MM; + // unsigned int kMMP = kMM0; + unsigned int kPPM = k00M; + unsigned int kPPP = KQK; + // unsigned int kMMM = neighborZ[kMM0]; + + dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; + dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; + dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; + dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; + dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; + dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; + dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; + dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; + dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// NOTE: Has not been tested after bug fix! +__global__ void QPrecursorDeviceDistributions( + int* subgridDistanceIndices, + real* subgridDistances, + int sizeQ, + int numberOfBCnodes, + int numberOfPrecursorNodes, + real* distributions, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighbors0PP, + uint* neighbors0PM, + uint* neighbors0MP, + uint* neighbors0MM, + real* weights0PP, + real* weights0PM, + real* weights0MP, + real* weights0MM, + real* fsLast, + real* fsNext, + real timeRatio, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + + if(nodeIndex>=numberOfBCnodes) return; + + uint kNeighbor0PP = neighbors0PP[nodeIndex]; + real d0PP = weights0PP[nodeIndex]; + + real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp; + real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp; + + real* f0Last = fsLast; + real* f1Last = &fsLast[ numberOfPrecursorNodes]; + real* f2Last = &fsLast[2*numberOfPrecursorNodes]; + real* f3Last = &fsLast[3*numberOfPrecursorNodes]; + real* f4Last = &fsLast[4*numberOfPrecursorNodes]; + real* f5Last = &fsLast[5*numberOfPrecursorNodes]; + real* f6Last = &fsLast[6*numberOfPrecursorNodes]; + real* f7Last = &fsLast[7*numberOfPrecursorNodes]; + real* f8Last = &fsLast[8*numberOfPrecursorNodes]; + + real* f0Next = fsNext; + real* f1Next = &fsNext[ numberOfPrecursorNodes]; + real* f2Next = &fsNext[2*numberOfPrecursorNodes]; + real* f3Next = &fsNext[3*numberOfPrecursorNodes]; + real* f4Next = &fsNext[4*numberOfPrecursorNodes]; + real* f5Next = &fsNext[5*numberOfPrecursorNodes]; + real* f6Next = &fsNext[6*numberOfPrecursorNodes]; + real* f7Next = &fsNext[7*numberOfPrecursorNodes]; + real* f8Next = &fsNext[8*numberOfPrecursorNodes]; + + + if(d0PP<1e6) + { + uint kNeighbor0PM = neighbors0PM[nodeIndex]; + uint kNeighbor0MP = neighbors0MP[nodeIndex]; + uint kNeighbor0MM = neighbors0MM[nodeIndex]; + + real d0PM = weights0PM[nodeIndex]; + real d0MP = weights0MP[nodeIndex]; + real d0MM = weights0MM[nodeIndex]; + + real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM); + + f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum; + f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum; + f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum; + f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum; + f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum; + f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum; + f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum; + f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum; + f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum; + + f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum; + f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum; + + } else { + f0LastInterp = f0Last[kNeighbor0PP]; + f1LastInterp = f1Last[kNeighbor0PP]; + f2LastInterp = f2Last[kNeighbor0PP]; + f3LastInterp = f3Last[kNeighbor0PP]; + f4LastInterp = f4Last[kNeighbor0PP]; + f5LastInterp = f5Last[kNeighbor0PP]; + f6LastInterp = f6Last[kNeighbor0PP]; + f7LastInterp = f7Last[kNeighbor0PP]; + f8LastInterp = f8Last[kNeighbor0PP]; + + f0NextInterp = f0Next[kNeighbor0PP]; + f1NextInterp = f1Next[kNeighbor0PP]; + f2NextInterp = f2Next[kNeighbor0PP]; + f3NextInterp = f3Next[kNeighbor0PP]; + f4NextInterp = f4Next[kNeighbor0PP]; + f5NextInterp = f5Next[kNeighbor0PP]; + f6NextInterp = f6Next[kNeighbor0PP]; + f7NextInterp = f7Next[kNeighbor0PP]; + f8NextInterp = f8Next[kNeighbor0PP]; + } + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep + //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier + //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + + unsigned int KQK = subgridDistanceIndices[nodeIndex]; + // unsigned int k000= KQK; + unsigned int kP00 = KQK; + // unsigned int kM00 = neighborX[KQK]; + // unsigned int k0P0 = KQK; + unsigned int k0M0 = neighborY[KQK]; + // unsigned int k00P = KQK; + unsigned int k00M = neighborZ[KQK]; + // unsigned int kMM0 = neighborY[kM00]; + unsigned int kPP0 = KQK; + unsigned int kPM0 = k0M0; + // unsigned int kMP0 = kM00; + // unsigned int kM0M = neighborZ[kM00]; + unsigned int kP0P = KQK; + unsigned int kP0M = k00M; + // unsigned int kM0P = kM00; + unsigned int k0MM = neighborZ[k0M0]; + // unsigned int k0PM = k00M; + // unsigned int k0MP = k0M0; + unsigned int kPMP = k0M0; + // unsigned int kMPM = kM0M; + // unsigned int kMPP = kM00; + unsigned int kPMM = k0MM; + // unsigned int kMMP = kMM0; + unsigned int kPPM = k00M; + unsigned int kPPP = KQK; + // unsigned int kMMM = neighborZ[kMM0]; + SubgridDistances27 qs; + getPointersToSubgridDistances(qs, subgridDistances, sizeQ); + + real q; + q = qs.q[DIR_P00][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio; + q = qs.q[DIR_PP0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio; + q = qs.q[DIR_PM0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio; + q = qs.q[DIR_P0P][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio; + q = qs.q[DIR_P0M][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio; + q = qs.q[DIR_PPP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio; + q = qs.q[DIR_PMP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio; + q = qs.q[DIR_PPM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio; + q = qs.q[DIR_PMM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio; + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu index ccb2ce79c63515e59e4f9ae75016f44ced71a170..02cfd2bce3723162b645cef568c87ca3b1dd2720 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu @@ -1,29 +1,63 @@ -/* Device code */ -#include "LBM/LB.h" +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PressBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== +#include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" -#include "KernelUtilities.h" +#include "lbm/MacroscopicQuantities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QInflowScaleByPressDevice27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QInflowScaleByPressDevice27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -97,141 +131,141 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f1_E = (D.f[DIR_P00 ])[k1e ]; - real f1_W = (D.f[DIR_M00 ])[k1w ]; - real f1_N = (D.f[DIR_0P0 ])[k1n ]; - real f1_S = (D.f[DIR_0M0 ])[k1s ]; - real f1_T = (D.f[DIR_00P ])[k1t ]; - real f1_B = (D.f[DIR_00M ])[k1b ]; - real f1_NE = (D.f[DIR_PP0 ])[k1ne ]; - real f1_SW = (D.f[DIR_MM0 ])[k1sw ]; - real f1_SE = (D.f[DIR_PM0 ])[k1se ]; - real f1_NW = (D.f[DIR_MP0 ])[k1nw ]; - real f1_TE = (D.f[DIR_P0P ])[k1te ]; - real f1_BW = (D.f[DIR_M0M ])[k1bw ]; - real f1_BE = (D.f[DIR_P0M ])[k1be ]; - real f1_TW = (D.f[DIR_M0P ])[k1tw ]; - real f1_TN = (D.f[DIR_0PP ])[k1tn ]; - real f1_BS = (D.f[DIR_0MM ])[k1bs ]; - real f1_BN = (D.f[DIR_0PM ])[k1bn ]; - real f1_TS = (D.f[DIR_0MP ])[k1ts ]; + real f1_E = (D.f[DIR_P00])[k1e ]; + real f1_W = (D.f[DIR_M00])[k1w ]; + real f1_N = (D.f[DIR_0P0])[k1n ]; + real f1_S = (D.f[DIR_0M0])[k1s ]; + real f1_T = (D.f[DIR_00P])[k1t ]; + real f1_B = (D.f[DIR_00M])[k1b ]; + real f1_NE = (D.f[DIR_PP0])[k1ne ]; + real f1_SW = (D.f[DIR_MM0])[k1sw ]; + real f1_SE = (D.f[DIR_PM0])[k1se ]; + real f1_NW = (D.f[DIR_MP0])[k1nw ]; + real f1_TE = (D.f[DIR_P0P])[k1te ]; + real f1_BW = (D.f[DIR_M0M])[k1bw ]; + real f1_BE = (D.f[DIR_P0M])[k1be ]; + real f1_TW = (D.f[DIR_M0P])[k1tw ]; + real f1_TN = (D.f[DIR_0PP])[k1tn ]; + real f1_BS = (D.f[DIR_0MM])[k1bs ]; + real f1_BN = (D.f[DIR_0PM])[k1bn ]; + real f1_TS = (D.f[DIR_0MP])[k1ts ]; //real f1_ZERO = (D.f[DIR_000])[k1zero]; - real f1_TNE = (D.f[DIR_PPP ])[k1tne ]; - real f1_TSW = (D.f[DIR_MMP ])[k1tsw ]; - real f1_TSE = (D.f[DIR_PMP ])[k1tse ]; - real f1_TNW = (D.f[DIR_MPP ])[k1tnw ]; - real f1_BNE = (D.f[DIR_PPM ])[k1bne ]; - real f1_BSW = (D.f[DIR_MMM ])[k1bsw ]; - real f1_BSE = (D.f[DIR_PMM ])[k1bse ]; - real f1_BNW = (D.f[DIR_MPM ])[k1bnw ]; + real f1_TNE = (D.f[DIR_PPP])[k1tne ]; + real f1_TSW = (D.f[DIR_MMP])[k1tsw ]; + real f1_TSE = (D.f[DIR_PMP])[k1tse ]; + real f1_TNW = (D.f[DIR_MPP])[k1tnw ]; + real f1_BNE = (D.f[DIR_PPM])[k1bne ]; + real f1_BSW = (D.f[DIR_MMM])[k1bsw ]; + real f1_BSE = (D.f[DIR_PMM])[k1bse ]; + real f1_BNW = (D.f[DIR_MPM])[k1bnw ]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ]; - real f_W = (D.f[DIR_M00 ])[kw ]; - real f_N = (D.f[DIR_0P0 ])[kn ]; - real f_S = (D.f[DIR_0M0 ])[ks ]; - real f_T = (D.f[DIR_00P ])[kt ]; - real f_B = (D.f[DIR_00M ])[kb ]; - real f_NE = (D.f[DIR_PP0 ])[kne ]; - real f_SW = (D.f[DIR_MM0 ])[ksw ]; - real f_SE = (D.f[DIR_PM0 ])[kse ]; - real f_NW = (D.f[DIR_MP0 ])[knw ]; - real f_TE = (D.f[DIR_P0P ])[kte ]; - real f_BW = (D.f[DIR_M0M ])[kbw ]; - real f_BE = (D.f[DIR_P0M ])[kbe ]; - real f_TW = (D.f[DIR_M0P ])[ktw ]; - real f_TN = (D.f[DIR_0PP ])[ktn ]; - real f_BS = (D.f[DIR_0MM ])[kbs ]; - real f_BN = (D.f[DIR_0PM ])[kbn ]; - real f_TS = (D.f[DIR_0MP ])[kts ]; + real f_E = (D.f[DIR_P00])[ke ]; + real f_W = (D.f[DIR_M00])[kw ]; + real f_N = (D.f[DIR_0P0])[kn ]; + real f_S = (D.f[DIR_0M0])[ks ]; + real f_T = (D.f[DIR_00P])[kt ]; + real f_B = (D.f[DIR_00M])[kb ]; + real f_NE = (D.f[DIR_PP0])[kne ]; + real f_SW = (D.f[DIR_MM0])[ksw ]; + real f_SE = (D.f[DIR_PM0])[kse ]; + real f_NW = (D.f[DIR_MP0])[knw ]; + real f_TE = (D.f[DIR_P0P])[kte ]; + real f_BW = (D.f[DIR_M0M])[kbw ]; + real f_BE = (D.f[DIR_P0M])[kbe ]; + real f_TW = (D.f[DIR_M0P])[ktw ]; + real f_TN = (D.f[DIR_0PP])[ktn ]; + real f_BS = (D.f[DIR_0MM])[kbs ]; + real f_BN = (D.f[DIR_0PM])[kbn ]; + real f_TS = (D.f[DIR_0MP])[kts ]; //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_TNE = (D.f[DIR_PPP ])[ktne ]; - real f_TSW = (D.f[DIR_MMP ])[ktsw ]; - real f_TSE = (D.f[DIR_PMP ])[ktse ]; - real f_TNW = (D.f[DIR_MPP ])[ktnw ]; - real f_BNE = (D.f[DIR_PPM ])[kbne ]; - real f_BSW = (D.f[DIR_MMM ])[kbsw ]; - real f_BSE = (D.f[DIR_PMM ])[kbse ]; - real f_BNW = (D.f[DIR_MPM ])[kbnw ]; + real f_TNE = (D.f[DIR_PPP])[ktne ]; + real f_TSW = (D.f[DIR_MMP])[ktsw ]; + real f_TSE = (D.f[DIR_PMP])[ktse ]; + real f_TNW = (D.f[DIR_MPP])[ktnw ]; + real f_BNE = (D.f[DIR_PPM])[kbne ]; + real f_BSW = (D.f[DIR_MMM])[kbsw ]; + real f_BSE = (D.f[DIR_PMM])[kbse ]; + real f_BNW = (D.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// // real vx1, vx2, vx3; real drho, drho1; ////////////////////////////////////////////////////////////////////////// - //Dichte + //Dichte drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + - f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + - f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); + f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + + f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + - f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + - f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); ////////////////////////////////////////////////////////////////////////// - //Schallgeschwindigkeit - real cs = c1o1 / sqrtf(c3o1); + //Schallgeschwindigkeit + real cs = c1o1 / sqrtf(c3o1); ////////////////////////////////////////////////////////////////////////// - real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho; - //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one); - real diffRhoToAdd = rhoBC[k] - rhoInterpol; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //no velocity - ////////////////////////////////////////// + real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho; + //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one); + real diffRhoToAdd = rhoBC[k] - rhoInterpol; + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //no velocity + ////////////////////////////////////////// f_E = f1_E * cs + (c1o1 - cs) * f_E ; f_W = f1_W * cs + (c1o1 - cs) * f_W ; f_N = f1_N * cs + (c1o1 - cs) * f_N ; @@ -258,16 +292,16 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, f_BSW = f1_BSW * cs + (c1o1 - cs) * f_BSW ; f_BSE = f1_BSE * cs + (c1o1 - cs) * f_BSE ; f_BNW = f1_BNW * cs + (c1o1 - cs) * f_BNW ; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //scale by press - ////////////////////////////////////////// - //f_E = (f_E + c2over27 ) * diffRho - c2over27 ; + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //scale by press + ////////////////////////////////////////// + //f_E = (f_E + c2over27 ) * diffRho - c2over27 ; // f_W = (f_W + c2over27 ) * diffRho - c2over27 ; // f_N = (f_N + c2over27 ) * diffRho - c2over27 ; // f_S = (f_S + c2over27 ) * diffRho - c2over27 ; // f_T = (f_T + c2over27 ) * diffRho - c2over27 ; // f_B = (f_B + c2over27 ) * diffRho - c2over27 ; - //f_NE = (f_NE + c1over54 ) * diffRho - c1over54 ; + //f_NE = (f_NE + c1over54 ) * diffRho - c1over54 ; // f_SW = (f_SW + c1over54 ) * diffRho - c1over54 ; // f_SE = (f_SE + c1over54 ) * diffRho - c1over54 ; // f_NW = (f_NW + c1over54 ) * diffRho - c1over54 ; @@ -287,16 +321,16 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, // f_BSW = (f_BSW + c1over216) * diffRho - c1over216; // f_BSE = (f_BSE + c1over216) * diffRho - c1over216; // f_BNW = (f_BNW + c1over216) * diffRho - c1over216; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // add press - ////////////////////////////////////////// - f_E = (f_E + c2o27 * diffRhoToAdd); + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // add press + ////////////////////////////////////////// + f_E = (f_E + c2o27 * diffRhoToAdd); f_W = (f_W + c2o27 * diffRhoToAdd); f_N = (f_N + c2o27 * diffRhoToAdd); f_S = (f_S + c2o27 * diffRhoToAdd); f_T = (f_T + c2o27 * diffRhoToAdd); f_B = (f_B + c2o27 * diffRhoToAdd); - f_NE = (f_NE + c1o54 * diffRhoToAdd); + f_NE = (f_NE + c1o54 * diffRhoToAdd); f_SW = (f_SW + c1o54 * diffRhoToAdd); f_SE = (f_SE + c1o54 * diffRhoToAdd); f_NW = (f_NW + c1o54 * diffRhoToAdd); @@ -316,111 +350,111 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, f_BSW = (f_BSW + c1o216 * diffRhoToAdd); f_BSE = (f_BSE + c1o216 * diffRhoToAdd); f_BNW = (f_BNW + c1o216 * diffRhoToAdd); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// //__syncthreads(); - // -X - //(D.f[DIR_P00 ])[ke ] = f_E ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_PP0 ])[kne ] = f_NE ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_P0P ])[kte ] = f_TE ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_PPP ])[ktne ] = f_TNE ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - // X - (D.f[DIR_M00 ])[kw ] = f_W ; - (D.f[DIR_MM0 ])[ksw ] = f_SW ; - (D.f[DIR_MP0 ])[knw ] = f_NW ; - (D.f[DIR_M0M ])[kbw ] = f_BW ; - (D.f[DIR_M0P ])[ktw ] = f_TW ; - (D.f[DIR_MMP ])[ktsw ] = f_TSW ; - (D.f[DIR_MPP ])[ktnw ] = f_TNW ; - (D.f[DIR_MMM ])[kbsw ] = f_BSW ; - (D.f[DIR_MPM ])[kbnw ] = f_BNW ; - // Y - //(D.f[DIR_0M0 ])[ks ] = f_S ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_MM0 ])[ksw ] = f_SW ; - //(D.f[DIR_0MP ])[kts ] = f_TS ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_MMP ])[ktsw ] = f_TSW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - // Z - //(D.f[DIR_00M ])[kb ] = f_B ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_M0M ])[kbw ] = f_BW ; - //(D.f[DIR_0PM ])[kbn ] = f_BN ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - //(D.f[DIR_MPM ])[kbnw ] = f_BNW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; + // -X + //(D.f[DIR_P00])[ke ] = f_E ; + //(D.f[DIR_PM0])[kse ] = f_SE ; + //(D.f[DIR_PP0])[kne ] = f_NE ; + //(D.f[DIR_P0M])[kbe ] = f_BE ; + //(D.f[DIR_P0P])[kte ] = f_TE ; + //(D.f[DIR_PMP])[ktse ] = f_TSE ; + //(D.f[DIR_PPP])[ktne ] = f_TNE ; + //(D.f[DIR_PMM])[kbse ] = f_BSE ; + //(D.f[DIR_PPM])[kbne ] = f_BNE ; + // X + (D.f[DIR_M00])[kw ] = f_W ; + (D.f[DIR_MM0])[ksw ] = f_SW ; + (D.f[DIR_MP0])[knw ] = f_NW ; + (D.f[DIR_M0M])[kbw ] = f_BW ; + (D.f[DIR_M0P])[ktw ] = f_TW ; + (D.f[DIR_MMP])[ktsw ] = f_TSW ; + (D.f[DIR_MPP])[ktnw ] = f_TNW ; + (D.f[DIR_MMM])[kbsw ] = f_BSW ; + (D.f[DIR_MPM])[kbnw ] = f_BNW ; + // Y + //(D.f[DIR_0M0])[ks ] = f_S ; + //(D.f[DIR_PM0])[kse ] = f_SE ; + //(D.f[DIR_MM0])[ksw ] = f_SW ; + //(D.f[DIR_0MP])[kts ] = f_TS ; + //(D.f[DIR_0MM])[kbs ] = f_BS ; + //(D.f[DIR_PMP])[ktse ] = f_TSE ; + //(D.f[DIR_MMP])[ktsw ] = f_TSW ; + //(D.f[DIR_PMM])[kbse ] = f_BSE ; + //(D.f[DIR_MMM])[kbsw ] = f_BSW ; + // Z + //(D.f[DIR_00M])[kb ] = f_B ; + //(D.f[DIR_P0M])[kbe ] = f_BE ; + //(D.f[DIR_M0M])[kbw ] = f_BW ; + //(D.f[DIR_0PM])[kbn ] = f_BN ; + //(D.f[DIR_0MM])[kbs ] = f_BS ; + //(D.f[DIR_PPM])[kbne ] = f_BNE ; + //(D.f[DIR_MPM])[kbnw ] = f_BNW ; + //(D.f[DIR_PMM])[kbse ] = f_BSE ; + //(D.f[DIR_MMM])[kbsw ] = f_BSW ; ////////////////////////////////////////////////////////////////////////// } } @@ -465,22 +499,23 @@ __global__ void QInflowScaleByPressDevice27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceIncompNEQ27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceIncompNEQ27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -554,112 +589,112 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC, Distributions27 D; if (isEvenTimestep==true) //// ACHTUNG PREColl !!!!!!!!!!!!!! { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_W = (D.f[DIR_P00 ])[k1e ]; - f1_E = (D.f[DIR_M00 ])[k1w ]; - f1_S = (D.f[DIR_0P0 ])[k1n ]; - f1_N = (D.f[DIR_0M0 ])[k1s ]; - f1_B = (D.f[DIR_00P ])[k1t ]; - f1_T = (D.f[DIR_00M ])[k1b ]; - f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - f1_NW = (D.f[DIR_PM0 ])[k1se ]; - f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - f1_BW = (D.f[DIR_P0P ])[k1te ]; - f1_TE = (D.f[DIR_M0M ])[k1bw ]; - f1_TW = (D.f[DIR_P0M ])[k1be ]; - f1_BE = (D.f[DIR_M0P ])[k1tw ]; - f1_BS = (D.f[DIR_0PP ])[k1tn ]; - f1_TN = (D.f[DIR_0MM ])[k1bs ]; - f1_TS = (D.f[DIR_0PM ])[k1bn ]; - f1_BN = (D.f[DIR_0MP ])[k1ts ]; + f1_W = (D.f[DIR_P00])[k1e ]; + f1_E = (D.f[DIR_M00])[k1w ]; + f1_S = (D.f[DIR_0P0])[k1n ]; + f1_N = (D.f[DIR_0M0])[k1s ]; + f1_B = (D.f[DIR_00P])[k1t ]; + f1_T = (D.f[DIR_00M])[k1b ]; + f1_SW = (D.f[DIR_PP0])[k1ne ]; + f1_NE = (D.f[DIR_MM0])[k1sw ]; + f1_NW = (D.f[DIR_PM0])[k1se ]; + f1_SE = (D.f[DIR_MP0])[k1nw ]; + f1_BW = (D.f[DIR_P0P])[k1te ]; + f1_TE = (D.f[DIR_M0M])[k1bw ]; + f1_TW = (D.f[DIR_P0M])[k1be ]; + f1_BE = (D.f[DIR_M0P])[k1tw ]; + f1_BS = (D.f[DIR_0PP])[k1tn ]; + f1_TN = (D.f[DIR_0MM])[k1bs ]; + f1_TS = (D.f[DIR_0PM])[k1bn ]; + f1_BN = (D.f[DIR_0MP])[k1ts ]; f1_ZERO = (D.f[DIR_000])[k1zero]; - f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + f1_BSW = (D.f[DIR_PPP])[k1tne ]; + f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + f1_BNW = (D.f[DIR_PMP])[k1tse ]; + f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + f1_TSW = (D.f[DIR_PPM])[k1bne ]; + f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + f1_TNW = (D.f[DIR_PMM])[k1bse ]; + f1_TSE = (D.f[DIR_MPM])[k1bnw ]; ////////////////////////////////////////////////////////////////////////// real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; real vx1 = ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + - ((f1_BE - f1_TW) + (f1_TE - f1_BW)) + ((f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_E - f1_W); + ((f1_BE - f1_TW) + (f1_TE - f1_BW)) + ((f1_SE - f1_NW) + (f1_NE - f1_SW)) + + (f1_E - f1_W); real vx2 = (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + - ((f1_BN - f1_TS) + (f1_TN - f1_BS)) + (-(f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_N - f1_S); + ((f1_BN - f1_TS) + (f1_TN - f1_BS)) + (-(f1_SE - f1_NW) + (f1_NE - f1_SW)) + + (f1_N - f1_S); real vx3 = ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) + - (-(f1_BN - f1_TS) + (f1_TN - f1_BS)) + ((f1_TE - f1_BW) - (f1_BE - f1_TW)) + - (f1_T - f1_B); + (-(f1_BN - f1_TS) + (f1_TN - f1_BS)) + ((f1_TE - f1_BW) - (f1_BE - f1_TW)) + + (f1_T - f1_B); real cusq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); @@ -690,15 +725,15 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC, f1_BNW -= c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq)); f1_BSE -= c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); f1_TNW -= c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); - - drho1 = rhoBC[k]; - //if(vx1 < zero){ - // vx1 *= 0.9; - //} - //if(vx2 < zero){ - // vx2 *= c1o10;//0.9; - //} + drho1 = rhoBC[k]; + + //if(vx1 < zero){ + // vx1 *= 0.9; + //} + //if(vx2 < zero){ + // vx2 *= c1o10;//0.9; + //} f1_ZERO += c8o27* (drho1-(drho1+c1o1)*cusq); f1_E += c2o27* (drho1+(drho1+c1o1)*(c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cusq)); @@ -728,39 +763,39 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC, f1_BSE += c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); f1_TNW += c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); - //drho1 = (drho1 + rhoBC[k])/2.f; - //drho1 = drho1 - rhoBC[k]; + //drho1 = (drho1 + rhoBC[k])/2.f; + //drho1 = drho1 - rhoBC[k]; ////////////////////////////////////////////////////////////////////////// __syncthreads(); - (D.f[DIR_P00 ])[ke ] = f1_W ; - (D.f[DIR_M00 ])[kw ] = f1_E ; - (D.f[DIR_0P0 ])[kn ] = f1_S ; - (D.f[DIR_0M0 ])[ks ] = f1_N ; - (D.f[DIR_00P ])[kt ] = f1_B ; - (D.f[DIR_00M ])[kb ] = f1_T ; - (D.f[DIR_PP0 ])[kne ] = f1_SW ; - (D.f[DIR_MM0 ])[ksw ] = f1_NE ; - (D.f[DIR_PM0 ])[kse ] = f1_NW ; - (D.f[DIR_MP0 ])[knw ] = f1_SE ; - (D.f[DIR_P0P ])[kte ] = f1_BW ; - (D.f[DIR_M0M ])[kbw ] = f1_TE ; - (D.f[DIR_P0M ])[kbe ] = f1_TW ; - (D.f[DIR_M0P ])[ktw ] = f1_BE ; - (D.f[DIR_0PP ])[ktn ] = f1_BS ; - (D.f[DIR_0MM ])[kbs ] = f1_TN ; - (D.f[DIR_0PM ])[kbn ] = f1_TS ; - (D.f[DIR_0MP ])[kts ] = f1_BN ; - (D.f[DIR_000])[kzero] = f1_ZERO; - (D.f[DIR_PPP ])[ktne ] = f1_BSW ; - (D.f[DIR_MMP ])[ktsw ] = f1_BNE ; - (D.f[DIR_PMP ])[ktse ] = f1_BNW ; - (D.f[DIR_MPP ])[ktnw ] = f1_BSE ; - (D.f[DIR_PPM ])[kbne ] = f1_TSW ; - (D.f[DIR_MMM ])[kbsw ] = f1_TNE ; - (D.f[DIR_PMM ])[kbse ] = f1_TNW ; - (D.f[DIR_MPM ])[kbnw ] = f1_TSE ; + (D.f[DIR_P00])[ke ] = f1_W ; + (D.f[DIR_M00])[kw ] = f1_E ; + (D.f[DIR_0P0])[kn ] = f1_S ; + (D.f[DIR_0M0])[ks ] = f1_N ; + (D.f[DIR_00P])[kt ] = f1_B ; + (D.f[DIR_00M])[kb ] = f1_T ; + (D.f[DIR_PP0])[kne ] = f1_SW ; + (D.f[DIR_MM0])[ksw ] = f1_NE ; + (D.f[DIR_PM0])[kse ] = f1_NW ; + (D.f[DIR_MP0])[knw ] = f1_SE ; + (D.f[DIR_P0P])[kte ] = f1_BW ; + (D.f[DIR_M0M])[kbw ] = f1_TE ; + (D.f[DIR_P0M])[kbe ] = f1_TW ; + (D.f[DIR_M0P])[ktw ] = f1_BE ; + (D.f[DIR_0PP])[ktn ] = f1_BS ; + (D.f[DIR_0MM])[kbs ] = f1_TN ; + (D.f[DIR_0PM])[kbn ] = f1_TS ; + (D.f[DIR_0MP])[kts ] = f1_BN ; + (D.f[DIR_000])[kzero] = f1_ZERO; + (D.f[DIR_PPP])[ktne ] = f1_BSW ; + (D.f[DIR_MMP])[ktsw ] = f1_BNE ; + (D.f[DIR_PMP])[ktse ] = f1_BNW ; + (D.f[DIR_MPP])[ktnw ] = f1_BSE ; + (D.f[DIR_PPM])[kbne ] = f1_TSW ; + (D.f[DIR_MMM])[kbsw ] = f1_TNE ; + (D.f[DIR_PMM])[kbse ] = f1_TNW ; + (D.f[DIR_MPM])[kbnw ] = f1_TSE ; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -804,54 +839,49 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceNEQ27(real* rhoBC, - real* distribution, - int* bcNodeIndices, - int* bcNeighborIndices, - int numberOfBCnodes, - real omega1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) +__global__ void QPressDeviceNEQ27( + real* rhoBC, + real* distributions, + int* bcNodeIndices, + int* bcNeighborIndices, + int numberOfBCnodes, + real omega1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - ////////////////////////////////////////////////////////////////////////// - //! The pressure boundary condition is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + //////////////////////////////////////////////////////////////////////////////// + //! The pressure boundary condition is executed in the following steps + //! - const unsigned k = nx*(ny*z + y) + x; + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - ////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> //! Distributions27 dist; - getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep); + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); //////////////////////////////////////////////////////////////////////////////// //! - Set local pressure //! - real rhoBClocal = rhoBC[k]; + real rhoBClocal = rhoBC[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int KQK = bcNodeIndices[k]; + unsigned int KQK = bcNodeIndices[nodeIndex]; unsigned int kzero= KQK; unsigned int ke = KQK; unsigned int kw = neighborX[KQK]; @@ -882,7 +912,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) for neighboring node //! - unsigned int K1QK = bcNeighborIndices[k]; + unsigned int K1QK = bcNeighborIndices[nodeIndex]; unsigned int k1zero= K1QK; unsigned int k1e = K1QK; unsigned int k1w = neighborX[K1QK]; @@ -914,52 +944,52 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions for neighboring node //! - real f1_W = (dist.f[DIR_P00 ])[k1e ]; - real f1_E = (dist.f[DIR_M00 ])[k1w ]; - real f1_S = (dist.f[DIR_0P0 ])[k1n ]; - real f1_N = (dist.f[DIR_0M0 ])[k1s ]; - real f1_B = (dist.f[DIR_00P ])[k1t ]; - real f1_T = (dist.f[DIR_00M ])[k1b ]; - real f1_SW = (dist.f[DIR_PP0 ])[k1ne ]; - real f1_NE = (dist.f[DIR_MM0 ])[k1sw ]; - real f1_NW = (dist.f[DIR_PM0 ])[k1se ]; - real f1_SE = (dist.f[DIR_MP0 ])[k1nw ]; - real f1_BW = (dist.f[DIR_P0P ])[k1te ]; - real f1_TE = (dist.f[DIR_M0M ])[k1bw ]; - real f1_TW = (dist.f[DIR_P0M ])[k1be ]; - real f1_BE = (dist.f[DIR_M0P ])[k1tw ]; - real f1_BS = (dist.f[DIR_0PP ])[k1tn ]; - real f1_TN = (dist.f[DIR_0MM ])[k1bs ]; - real f1_TS = (dist.f[DIR_0PM ])[k1bn ]; - real f1_BN = (dist.f[DIR_0MP ])[k1ts ]; + real f1_W = (dist.f[DIR_P00])[k1e ]; + real f1_E = (dist.f[DIR_M00])[k1w ]; + real f1_S = (dist.f[DIR_0P0])[k1n ]; + real f1_N = (dist.f[DIR_0M0])[k1s ]; + real f1_B = (dist.f[DIR_00P])[k1t ]; + real f1_T = (dist.f[DIR_00M])[k1b ]; + real f1_SW = (dist.f[DIR_PP0])[k1ne ]; + real f1_NE = (dist.f[DIR_MM0])[k1sw ]; + real f1_NW = (dist.f[DIR_PM0])[k1se ]; + real f1_SE = (dist.f[DIR_MP0])[k1nw ]; + real f1_BW = (dist.f[DIR_P0P])[k1te ]; + real f1_TE = (dist.f[DIR_M0M])[k1bw ]; + real f1_TW = (dist.f[DIR_P0M])[k1be ]; + real f1_BE = (dist.f[DIR_M0P])[k1tw ]; + real f1_BS = (dist.f[DIR_0PP])[k1tn ]; + real f1_TN = (dist.f[DIR_0MM])[k1bs ]; + real f1_TS = (dist.f[DIR_0PM])[k1bn ]; + real f1_BN = (dist.f[DIR_0MP])[k1ts ]; real f1_ZERO = (dist.f[DIR_000])[k1zero]; - real f1_BSW = (dist.f[DIR_PPP ])[k1tne ]; - real f1_BNE = (dist.f[DIR_MMP ])[k1tsw ]; - real f1_BNW = (dist.f[DIR_PMP ])[k1tse ]; - real f1_BSE = (dist.f[DIR_MPP ])[k1tnw ]; - real f1_TSW = (dist.f[DIR_PPM ])[k1bne ]; - real f1_TNE = (dist.f[DIR_MMM ])[k1bsw ]; - real f1_TNW = (dist.f[DIR_PMM ])[k1bse ]; - real f1_TSE = (dist.f[DIR_MPM ])[k1bnw ]; + real f1_BSW = (dist.f[DIR_PPP])[k1tne ]; + real f1_BNE = (dist.f[DIR_MMP])[k1tsw ]; + real f1_BNW = (dist.f[DIR_PMP])[k1tse ]; + real f1_BSE = (dist.f[DIR_MPP])[k1tnw ]; + real f1_TSW = (dist.f[DIR_PPM])[k1bne ]; + real f1_TNE = (dist.f[DIR_MMM])[k1bsw ]; + real f1_TNW = (dist.f[DIR_PMM])[k1bse ]; + real f1_TSE = (dist.f[DIR_MPM])[k1bnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities (for neighboring node) //! real drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + - f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + - f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]); + f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + + f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]); real vx1 = (((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + ((f1_BE - f1_TW) + (f1_TE - f1_BW)) + ((f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_E - f1_W)) / (c1o1 + drho1); + (f1_E - f1_W)) / (c1o1 + drho1); real vx2 = ((-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + ((f1_BN - f1_TS) + (f1_TN - f1_BS)) + (-(f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_N - f1_S)) / (c1o1 + drho1); + (f1_N - f1_S)) / (c1o1 + drho1); real vx3 = (((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) + (-(f1_BN - f1_TS) + (f1_TN - f1_BS)) + ((f1_TE - f1_BW) - (f1_BE - f1_TW)) + - (f1_T - f1_B)) / (c1o1 + drho1); + (f1_T - f1_B)) / (c1o1 + drho1); real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); @@ -1037,33 +1067,33 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// //! write the new distributions to the bc nodes //! - (dist.f[DIR_P00 ])[ke ] = f1_W ; - (dist.f[DIR_M00 ])[kw ] = f1_E ; - (dist.f[DIR_0P0 ])[kn ] = f1_S ; - (dist.f[DIR_0M0 ])[ks ] = f1_N ; - (dist.f[DIR_00P ])[kt ] = f1_B ; - (dist.f[DIR_00M ])[kb ] = f1_T ; - (dist.f[DIR_PP0 ])[kne ] = f1_SW ; - (dist.f[DIR_MM0 ])[ksw ] = f1_NE ; - (dist.f[DIR_PM0 ])[kse ] = f1_NW ; - (dist.f[DIR_MP0 ])[knw ] = f1_SE ; - (dist.f[DIR_P0P ])[kte ] = f1_BW ; - (dist.f[DIR_M0M ])[kbw ] = f1_TE ; - (dist.f[DIR_P0M ])[kbe ] = f1_TW ; - (dist.f[DIR_M0P ])[ktw ] = f1_BE ; - (dist.f[DIR_0PP ])[ktn ] = f1_BS ; - (dist.f[DIR_0MM ])[kbs ] = f1_TN ; - (dist.f[DIR_0PM ])[kbn ] = f1_TS ; - (dist.f[DIR_0MP ])[kts ] = f1_BN ; + (dist.f[DIR_P00])[ke ] = f1_W ; + (dist.f[DIR_M00])[kw ] = f1_E ; + (dist.f[DIR_0P0])[kn ] = f1_S ; + (dist.f[DIR_0M0])[ks ] = f1_N ; + (dist.f[DIR_00P])[kt ] = f1_B ; + (dist.f[DIR_00M])[kb ] = f1_T ; + (dist.f[DIR_PP0])[kne ] = f1_SW ; + (dist.f[DIR_MM0])[ksw ] = f1_NE ; + (dist.f[DIR_PM0])[kse ] = f1_NW ; + (dist.f[DIR_MP0])[knw ] = f1_SE ; + (dist.f[DIR_P0P])[kte ] = f1_BW ; + (dist.f[DIR_M0M])[kbw ] = f1_TE ; + (dist.f[DIR_P0M])[kbe ] = f1_TW ; + (dist.f[DIR_M0P])[ktw ] = f1_BE ; + (dist.f[DIR_0PP])[ktn ] = f1_BS ; + (dist.f[DIR_0MM])[kbs ] = f1_TN ; + (dist.f[DIR_0PM])[kbn ] = f1_TS ; + (dist.f[DIR_0MP])[kts ] = f1_BN ; (dist.f[DIR_000])[kzero] = f1_ZERO; - (dist.f[DIR_PPP ])[ktne ] = f1_BSW ; - (dist.f[DIR_MMP ])[ktsw ] = f1_BNE ; - (dist.f[DIR_PMP ])[ktse ] = f1_BNW ; - (dist.f[DIR_MPP ])[ktnw ] = f1_BSE ; - (dist.f[DIR_PPM ])[kbne ] = f1_TSW ; - (dist.f[DIR_MMM ])[kbsw ] = f1_TNE ; - (dist.f[DIR_PMM ])[kbse ] = f1_TNW ; - (dist.f[DIR_MPM ])[kbnw ] = f1_TSE ; + (dist.f[DIR_PPP])[ktne ] = f1_BSW ; + (dist.f[DIR_MMP])[ktsw ] = f1_BNE ; + (dist.f[DIR_PMP])[ktse ] = f1_BNW ; + (dist.f[DIR_MPP])[ktnw ] = f1_BSE ; + (dist.f[DIR_PPM])[kbne ] = f1_TSW ; + (dist.f[DIR_MMM])[kbsw ] = f1_TNE ; + (dist.f[DIR_PMM])[kbse ] = f1_TNW ; + (dist.f[DIR_MPM])[kbnw ] = f1_TSE ; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1107,16 +1137,17 @@ __global__ void QPressDeviceNEQ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// -__global__ void LB_BC_Press_East27( int nx, - int ny, - int tz, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void LB_BC_Press_East27( + int nx, + int ny, + int tz, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //thread-index int ty = blockIdx.x; @@ -1124,9 +1155,9 @@ __global__ void LB_BC_Press_East27( int nx, int k, k1, nxny; // Zugriff auf arrays im device - int x = tx + STARTOFFX; // Globaler x-Index - int y = ty + STARTOFFY; // Globaler y-Index - int z = tz + STARTOFFZ; // Globaler z-Index + int x = tx + STARTOFFX; // Globaler x-Index + int y = ty + STARTOFFY; // Globaler y-Index + int z = tz + STARTOFFZ; // Globaler z-Index k = nx*(ny*z + y) + x; nxny = nx*ny; @@ -1137,63 +1168,63 @@ __global__ void LB_BC_Press_East27( int nx, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -1312,69 +1343,69 @@ __global__ void LB_BC_Press_East27( int nx, real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_W = (D.f[DIR_P00 ])[k1e ]; - f1_E = (D.f[DIR_M00 ])[k1w ]; - f1_S = (D.f[DIR_0P0 ])[k1n ]; - f1_N = (D.f[DIR_0M0 ])[k1s ]; - f1_B = (D.f[DIR_00P ])[k1t ]; - f1_T = (D.f[DIR_00M ])[k1b ]; - f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - f1_NW = (D.f[DIR_PM0 ])[k1se ]; - f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - f1_BW = (D.f[DIR_P0P ])[k1te ]; - f1_TE = (D.f[DIR_M0M ])[k1bw ]; - f1_TW = (D.f[DIR_P0M ])[k1be ]; - f1_BE = (D.f[DIR_M0P ])[k1tw ]; - f1_BS = (D.f[DIR_0PP ])[k1tn ]; - f1_TN = (D.f[DIR_0MM ])[k1bs ]; - f1_TS = (D.f[DIR_0PM ])[k1bn ]; - f1_BN = (D.f[DIR_0MP ])[k1ts ]; + f1_W = (D.f[DIR_P00])[k1e ]; + f1_E = (D.f[DIR_M00])[k1w ]; + f1_S = (D.f[DIR_0P0])[k1n ]; + f1_N = (D.f[DIR_0M0])[k1s ]; + f1_B = (D.f[DIR_00P])[k1t ]; + f1_T = (D.f[DIR_00M])[k1b ]; + f1_SW = (D.f[DIR_PP0])[k1ne ]; + f1_NE = (D.f[DIR_MM0])[k1sw ]; + f1_NW = (D.f[DIR_PM0])[k1se ]; + f1_SE = (D.f[DIR_MP0])[k1nw ]; + f1_BW = (D.f[DIR_P0P])[k1te ]; + f1_TE = (D.f[DIR_M0M])[k1bw ]; + f1_TW = (D.f[DIR_P0M])[k1be ]; + f1_BE = (D.f[DIR_M0P])[k1tw ]; + f1_BS = (D.f[DIR_0PP])[k1tn ]; + f1_TN = (D.f[DIR_0MM])[k1bs ]; + f1_TS = (D.f[DIR_0PM])[k1bn ]; + f1_BN = (D.f[DIR_0MP])[k1ts ]; f1_ZERO = (D.f[DIR_000])[k1zero]; - f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + f1_BSW = (D.f[DIR_PPP])[k1tne ]; + f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + f1_BNW = (D.f[DIR_PMP])[k1tse ]; + f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + f1_TSW = (D.f[DIR_PPM])[k1bne ]; + f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + f1_TNW = (D.f[DIR_PMM])[k1bse ]; + f1_TSE = (D.f[DIR_MPM])[k1bnw ]; real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; __syncthreads(); - (D.f[DIR_P00 ])[ke ] = f1_W -c2o27*drho1; - (D.f[DIR_M00 ])[kw ] = f1_E -c2o27*drho1; - (D.f[DIR_0P0 ])[kn ] = f1_S -c2o27*drho1; - (D.f[DIR_0M0 ])[ks ] = f1_N -c2o27*drho1; - (D.f[DIR_00P ])[kt ] = f1_B -c2o27*drho1; - (D.f[DIR_00M ])[kb ] = f1_T -c2o27*drho1; - (D.f[DIR_PP0 ])[kne ] = f1_SW -c1o54*drho1; - (D.f[DIR_MM0 ])[ksw ] = f1_NE -c1o54*drho1; - (D.f[DIR_PM0 ])[kse ] = f1_NW -c1o54*drho1; - (D.f[DIR_MP0 ])[knw ] = f1_SE -c1o54*drho1; - (D.f[DIR_P0P ])[kte ] = f1_BW -c1o54*drho1; - (D.f[DIR_M0M ])[kbw ] = f1_TE -c1o54*drho1; - (D.f[DIR_P0M ])[kbe ] = f1_TW -c1o54*drho1; - (D.f[DIR_M0P ])[ktw ] = f1_BE -c1o54*drho1; - (D.f[DIR_0PP ])[ktn ] = f1_BS -c1o54*drho1; - (D.f[DIR_0MM ])[kbs ] = f1_TN -c1o54*drho1; - (D.f[DIR_0PM ])[kbn ] = f1_TS -c1o54*drho1; - (D.f[DIR_0MP ])[kts ] = f1_BN -c1o54*drho1; + (D.f[DIR_P00])[ke ] = f1_W -c2o27*drho1; + (D.f[DIR_M00])[kw ] = f1_E -c2o27*drho1; + (D.f[DIR_0P0])[kn ] = f1_S -c2o27*drho1; + (D.f[DIR_0M0])[ks ] = f1_N -c2o27*drho1; + (D.f[DIR_00P])[kt ] = f1_B -c2o27*drho1; + (D.f[DIR_00M])[kb ] = f1_T -c2o27*drho1; + (D.f[DIR_PP0])[kne ] = f1_SW -c1o54*drho1; + (D.f[DIR_MM0])[ksw ] = f1_NE -c1o54*drho1; + (D.f[DIR_PM0])[kse ] = f1_NW -c1o54*drho1; + (D.f[DIR_MP0])[knw ] = f1_SE -c1o54*drho1; + (D.f[DIR_P0P])[kte ] = f1_BW -c1o54*drho1; + (D.f[DIR_M0M])[kbw ] = f1_TE -c1o54*drho1; + (D.f[DIR_P0M])[kbe ] = f1_TW -c1o54*drho1; + (D.f[DIR_M0P])[ktw ] = f1_BE -c1o54*drho1; + (D.f[DIR_0PP])[ktn ] = f1_BS -c1o54*drho1; + (D.f[DIR_0MM])[kbs ] = f1_TN -c1o54*drho1; + (D.f[DIR_0PM])[kbn ] = f1_TS -c1o54*drho1; + (D.f[DIR_0MP])[kts ] = f1_BN -c1o54*drho1; (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1; - (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1; - (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1; - (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1; - (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1; - (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1; - (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1; - (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1; - (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1; + (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1; + (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1; + (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1; + (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1; + (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1; + (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1; + (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1; + (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1; } __syncthreads(); -} +} ////////////////////////////////////////////////////////////////////////////// @@ -1416,83 +1447,84 @@ __global__ void LB_BC_Press_East27( int nx, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDevice27(real* rhoBC, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDevice27( + real* rhoBC, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -1502,29 +1534,29 @@ __global__ void QPressDevice27(real* rhoBC, if(k<numberOfBCnodes) { - real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, - *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1567,46 +1599,46 @@ __global__ void QPressDevice27(real* rhoBC, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real q, vx1, vx2, vx3, drho; vx1 = ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + - (f_E - f_W); + (f_E - f_W); vx2 = (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + - (f_N - f_S); + (f_N - f_S); vx3 = ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + - (f_T - f_B); + (f_T - f_B); real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); ////////////////////////////////////////////////////////////////////////// @@ -1616,245 +1648,245 @@ __global__ void QPressDevice27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); - //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1 )+c9over2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); + //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1 )+c9over2*( vx1 )*( vx1 )-cu_sq); } q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); - //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); + //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cu_sq); } q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); - //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*( vx2 )+c9over2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); + //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*( vx2 )+c9over2*( vx2 )*( vx2 )-cu_sq); } q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); - //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); + //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cu_sq); } q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); - //(D.f[DIR_00P])[kt]=c2over27* (drho+three*( vx3)+c9over2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); + //(D.f[DIR_00P])[kt]=c2over27* (drho+three*( vx3)+c9over2*( vx3)*( vx3)-cu_sq); } q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); - //(D.f[DIR_00M])[kb]=c2over27* (drho+three*( -vx3)+c9over2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); + //(D.f[DIR_00M])[kb]=c2over27* (drho+three*( -vx3)+c9over2*( -vx3)*( -vx3)-cu_sq); } q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); } q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); } q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); } q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); } q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); } q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); } q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); } q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); } q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cu_sq); + (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cu_sq); } q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); - //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); + //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); } q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cu_sq); + (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cu_sq); } q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); - //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); + //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); } q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); } q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); } q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); } q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); } q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); } q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); } q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); - //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); } q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); } } } @@ -1899,86 +1931,87 @@ __global__ void QPressDevice27(real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceAntiBB27( real* rhoBC, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceAntiBB27( + real* rhoBC, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -1988,37 +2021,37 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, if(k<numberOfBCnodes) { - real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, - *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 *numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 *numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 *numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 *numberOfBCnodes]; - q_dirT = &QQ[DIR_00P *numberOfBCnodes]; - q_dirB = &QQ[DIR_00M *numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 *numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 *numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 *numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 *numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P *numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M *numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M *numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P *numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP *numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM *numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM *numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP *numberOfBCnodes]; - q_dirTNE = &QQ[DIR_PPP *numberOfBCnodes]; - q_dirTSW = &QQ[DIR_MMP *numberOfBCnodes]; - q_dirTSE = &QQ[DIR_PMP *numberOfBCnodes]; - q_dirTNW = &QQ[DIR_MPP *numberOfBCnodes]; - q_dirBNE = &QQ[DIR_PPM *numberOfBCnodes]; - q_dirBSW = &QQ[DIR_MMM *numberOfBCnodes]; - q_dirBSE = &QQ[DIR_PMM *numberOfBCnodes]; - q_dirBNW = &QQ[DIR_MPM *numberOfBCnodes]; + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; + q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; + q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; + q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes]; + q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes]; + q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes]; + q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes]; + q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes]; //////////////////////////////////////////////////////////////////////////////// //index unsigned int KQK = k_Q[k]; @@ -2053,123 +2086,123 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; f_ZERO = (D.f[DIR_000])[kzero]; //////////////////////////////////////////////////////////////////////////////// //real vx1, vx2, vx3, drho; //vx1 = ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + - // (f_E - f_W); + // (f_E - f_W); //vx2 = (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + // ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + - // (f_N - f_S); + // (f_N - f_S); //vx3 = ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + // (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + - // (f_T - f_B); + // (f_T - f_B); //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); ////////////////////////////////////////////////////////////////////////// real drho = f_ZERO+f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+ - f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; + f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW; drho = drho - rhoBC[k]; - drho *= 0.01f; + drho *= 0.01f; //////////////////////////////////////////////////////////////////////////////// - real q; + real q; //deltaRho = (rhoBC[k] + one) / (deltaRho + one); //////////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_M00])[kw]=f_W-c2o27*drho; + (D.f[DIR_M00])[kw]=f_W-c2o27*drho; } q = q_dirW[k]; @@ -2181,19 +2214,19 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0M0])[ks]=f_S-c2o27*drho; + (D.f[DIR_0M0])[ks]=f_S-c2o27*drho; } q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_0P0])[kn]=f_N-c2o27*drho; + (D.f[DIR_0P0])[kn]=f_N-c2o27*drho; } q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_00M])[kb]=f_B-c2o27*drho; + (D.f[DIR_00M])[kb]=f_B-c2o27*drho; } q = q_dirB[k]; @@ -2229,13 +2262,13 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho; + (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho; } q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) { - (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho; + (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho; } q = q_dirBE[k]; @@ -2364,21 +2397,22 @@ __global__ void QPressDeviceAntiBB27( real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceFixBackflow27( real* rhoBC, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceFixBackflow27( + real* rhoBC, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -2426,63 +2460,63 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC, Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// (D.f[DIR_M00])[kw] = c2o27 * deltaRho; @@ -2555,21 +2589,22 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC, ////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceDirDepBot27( real* rhoBC, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceDirDepBot27( + real* rhoBC, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -2617,86 +2652,86 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E,f_W,f_N,f_S,f_T,f_NE,f_SW,f_SE,f_NW,f_TE,f_TW,f_TN,f_TS,f_ZERO,f_TNE,f_TSW,f_TSE,f_TNW;//, //f_B,f_BW,f_BE,f_BS,f_BN,f_BSW,f_BNE,f_BNW,f_BSE; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_TS = (D.f[DIR_0MP])[kts ]; f_ZERO = (D.f[DIR_000])[kzero]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //f_B = (four*rho- four*f_SW- eight*f_TSW-four*f_W- eight*f_TW- four*f_NW- eight*f_TNW-four*f_S- eight*f_TS-four*f_ZERO+ f_T-four*f_N- eight*f_TN- four*f_SE- eight*f_TSE-four*f_E- eight*f_TE- four*f_NE- eight*f_TNE)/nine; @@ -2793,496 +2828,474 @@ __global__ void QPressDeviceDirDepBot27( real* rhoBC, - - +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs) +{ + return f1[dir] * cs + (c1o1 - cs) * f[dir]; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressNoRhoDevice27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressNoRhoDevice27( + real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + ////////////////////////////////////////////////////////////////////////// - const unsigned k = nx*(ny*z + y) + x; + if(nodeIndex >= numberOfBCnodes) return; + + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[nodeIndex]; + // unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + //index1 + unsigned int K1QK = k_N[nodeIndex]; + //unsigned int k1zero= K1QK; + unsigned int k1e = K1QK; + unsigned int k1w = neighborX[K1QK]; + unsigned int k1n = K1QK; + unsigned int k1s = neighborY[K1QK]; + unsigned int k1t = K1QK; + unsigned int k1b = neighborZ[K1QK]; + unsigned int k1sw = neighborY[k1w]; + unsigned int k1ne = K1QK; + unsigned int k1se = k1s; + unsigned int k1nw = k1w; + unsigned int k1bw = neighborZ[k1w]; + unsigned int k1te = K1QK; + unsigned int k1be = k1b; + unsigned int k1tw = k1w; + unsigned int k1bs = neighborZ[k1s]; + unsigned int k1tn = K1QK; + unsigned int k1bn = k1b; + unsigned int k1ts = k1s; + unsigned int k1tse = k1s; + unsigned int k1bnw = k1bw; + unsigned int k1tnw = k1w; + unsigned int k1bse = k1bs; + unsigned int k1tsw = k1sw; + unsigned int k1bne = k1b; + unsigned int k1tne = K1QK; + unsigned int k1bsw = neighborZ[k1sw]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f[27], f1[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f1[DIR_P00] = (dist.f[DIR_P00])[k1e ]; + f1[DIR_M00] = (dist.f[DIR_M00])[k1w ]; + f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n ]; + f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s ]; + f1[DIR_00P] = (dist.f[DIR_00P])[k1t ]; + f1[DIR_00M] = (dist.f[DIR_00M])[k1b ]; + f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne ]; + f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw ]; + f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se ]; + f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw ]; + f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te ]; + f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw ]; + f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be ]; + f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw ]; + f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn ]; + f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs ]; + f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn ]; + f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts ]; + // f1[DIR_000] = (dist.f[DIR_000])[k1zero]; + f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ]; + f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ]; + f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ]; + f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ]; + f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ]; + f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ]; + f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ]; + f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_P00] = (dist.f[DIR_P00])[ke ]; + f[DIR_M00] = (dist.f[DIR_M00])[kw ]; + f[DIR_0P0] = (dist.f[DIR_0P0])[kn ]; + f[DIR_0M0] = (dist.f[DIR_0M0])[ks ]; + f[DIR_00P] = (dist.f[DIR_00P])[kt ]; + f[DIR_00M] = (dist.f[DIR_00M])[kb ]; + f[DIR_PP0] = (dist.f[DIR_PP0])[kne ]; + f[DIR_MM0] = (dist.f[DIR_MM0])[ksw ]; + f[DIR_PM0] = (dist.f[DIR_PM0])[kse ]; + f[DIR_MP0] = (dist.f[DIR_MP0])[knw ]; + f[DIR_P0P] = (dist.f[DIR_P0P])[kte ]; + f[DIR_M0M] = (dist.f[DIR_M0M])[kbw ]; + f[DIR_P0M] = (dist.f[DIR_P0M])[kbe ]; + f[DIR_M0P] = (dist.f[DIR_M0P])[ktw ]; + f[DIR_0PP] = (dist.f[DIR_0PP])[ktn ]; + f[DIR_0MM] = (dist.f[DIR_0MM])[kbs ]; + f[DIR_0PM] = (dist.f[DIR_0PM])[kbn ]; + f[DIR_0MP] = (dist.f[DIR_0MP])[kts ]; + // f[DIR_000] = (dist.f[DIR_000])[kzero]; + f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ]; + f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ]; + f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ]; + f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ]; + f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ]; + f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ]; + f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ]; + f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// - if(k<numberOfBCnodes) + + real cs = c1o1 / sqrtf(c3o1); + + ////////////////////////////////////////////////////////////////////////// + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) { - //////////////////////////////////////////////////////////////////////////////// - //index - unsigned int KQK = k_Q[k]; - //unsigned int kzero= KQK; - unsigned int ke = KQK; - unsigned int kw = neighborX[KQK]; - unsigned int kn = KQK; - unsigned int ks = neighborY[KQK]; - unsigned int kt = KQK; - unsigned int kb = neighborZ[KQK]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = KQK; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = KQK; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = KQK; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = KQK; - unsigned int kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////// - //index1 - unsigned int K1QK = k_N[k]; - //unsigned int k1zero= K1QK; - unsigned int k1e = K1QK; - unsigned int k1w = neighborX[K1QK]; - unsigned int k1n = K1QK; - unsigned int k1s = neighborY[K1QK]; - unsigned int k1t = K1QK; - unsigned int k1b = neighborZ[K1QK]; - unsigned int k1sw = neighborY[k1w]; - unsigned int k1ne = K1QK; - unsigned int k1se = k1s; - unsigned int k1nw = k1w; - unsigned int k1bw = neighborZ[k1w]; - unsigned int k1te = K1QK; - unsigned int k1be = k1b; - unsigned int k1tw = k1w; - unsigned int k1bs = neighborZ[k1s]; - unsigned int k1tn = K1QK; - unsigned int k1bn = k1b; - unsigned int k1ts = k1s; - unsigned int k1tse = k1s; - unsigned int k1bnw = k1bw; - unsigned int k1tnw = k1w; - unsigned int k1bse = k1bs; - unsigned int k1tsw = k1sw; - unsigned int k1bne = k1b; - unsigned int k1tne = K1QK; - unsigned int k1bsw = neighborZ[k1sw]; - //////////////////////////////////////////////////////////////////////////////// - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f1_E = (D.f[DIR_P00 ])[k1e ]; - real f1_W = (D.f[DIR_M00 ])[k1w ]; - real f1_N = (D.f[DIR_0P0 ])[k1n ]; - real f1_S = (D.f[DIR_0M0 ])[k1s ]; - real f1_T = (D.f[DIR_00P ])[k1t ]; - real f1_B = (D.f[DIR_00M ])[k1b ]; - real f1_NE = (D.f[DIR_PP0 ])[k1ne ]; - real f1_SW = (D.f[DIR_MM0 ])[k1sw ]; - real f1_SE = (D.f[DIR_PM0 ])[k1se ]; - real f1_NW = (D.f[DIR_MP0 ])[k1nw ]; - real f1_TE = (D.f[DIR_P0P ])[k1te ]; - real f1_BW = (D.f[DIR_M0M ])[k1bw ]; - real f1_BE = (D.f[DIR_P0M ])[k1be ]; - real f1_TW = (D.f[DIR_M0P ])[k1tw ]; - real f1_TN = (D.f[DIR_0PP ])[k1tn ]; - real f1_BS = (D.f[DIR_0MM ])[k1bs ]; - real f1_BN = (D.f[DIR_0PM ])[k1bn ]; - real f1_TS = (D.f[DIR_0MP ])[k1ts ]; - //real f1_ZERO = (D.f[DIR_000])[k1zero]; - real f1_TNE = (D.f[DIR_PPP ])[k1tne ]; - real f1_TSW = (D.f[DIR_MMP ])[k1tsw ]; - real f1_TSE = (D.f[DIR_PMP ])[k1tse ]; - real f1_TNW = (D.f[DIR_MPP ])[k1tnw ]; - real f1_BNE = (D.f[DIR_PPM ])[k1bne ]; - real f1_BSW = (D.f[DIR_MMM ])[k1bsw ]; - real f1_BSE = (D.f[DIR_PMM ])[k1bse ]; - real f1_BNW = (D.f[DIR_MPM ])[k1bnw ]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real f_E = (D.f[DIR_P00 ])[ke ]; - real f_W = (D.f[DIR_M00 ])[kw ]; - real f_N = (D.f[DIR_0P0 ])[kn ]; - real f_S = (D.f[DIR_0M0 ])[ks ]; - real f_T = (D.f[DIR_00P ])[kt ]; - real f_B = (D.f[DIR_00M ])[kb ]; - real f_NE = (D.f[DIR_PP0 ])[kne ]; - real f_SW = (D.f[DIR_MM0 ])[ksw ]; - real f_SE = (D.f[DIR_PM0 ])[kse ]; - real f_NW = (D.f[DIR_MP0 ])[knw ]; - real f_TE = (D.f[DIR_P0P ])[kte ]; - real f_BW = (D.f[DIR_M0M ])[kbw ]; - real f_BE = (D.f[DIR_P0M ])[kbe ]; - real f_TW = (D.f[DIR_M0P ])[ktw ]; - real f_TN = (D.f[DIR_0PP ])[ktn ]; - real f_BS = (D.f[DIR_0MM ])[kbs ]; - real f_BN = (D.f[DIR_0PM ])[kbn ]; - real f_TS = (D.f[DIR_0MP ])[kts ]; - //real f_ZERO = (D.f[DIR_000])[kzero]; - real f_TNE = (D.f[DIR_PPP ])[ktne ]; - real f_TSW = (D.f[DIR_MMP ])[ktsw ]; - real f_TSE = (D.f[DIR_PMP ])[ktse ]; - real f_TNW = (D.f[DIR_MPP ])[ktnw ]; - real f_BNE = (D.f[DIR_PPM ])[kbne ]; - real f_BSW = (D.f[DIR_MMM ])[kbsw ]; - real f_BSE = (D.f[DIR_PMM ])[kbse ]; - real f_BNW = (D.f[DIR_MPM ])[kbnw ]; - ////////////////////////////////////////////////////////////////////////// + case MZZ: + (dist.f[DIR_P00])[ke ] = computeOutflowDistribution(f, f1, DIR_P00, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + break; + + case PZZ: + (dist.f[DIR_M00])[kw ] = computeOutflowDistribution(f, f1, DIR_M00, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZMZ: + (dist.f[DIR_0P0])[kn ] = computeOutflowDistribution(f, f1, DIR_0P0, cs); + (dist.f[DIR_PP0])[kne ] = computeOutflowDistribution(f, f1, DIR_PP0, cs); + (dist.f[DIR_MP0])[knw ] = computeOutflowDistribution(f, f1, DIR_MP0, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + break; + + case ZPZ: + (dist.f[DIR_0M0])[ks ] = computeOutflowDistribution(f, f1, DIR_0M0, cs); + (dist.f[DIR_PM0])[kse ] = computeOutflowDistribution(f, f1, DIR_PM0, cs); + (dist.f[DIR_MM0])[ksw ] = computeOutflowDistribution(f, f1, DIR_MM0, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + + case ZZM: + (dist.f[DIR_00P])[kt ] = computeOutflowDistribution(f, f1, DIR_00P, cs); + (dist.f[DIR_P0P])[kte ] = computeOutflowDistribution(f, f1, DIR_P0P, cs); + (dist.f[DIR_M0P])[ktw ] = computeOutflowDistribution(f, f1, DIR_M0P, cs); + (dist.f[DIR_0PP])[ktn ] = computeOutflowDistribution(f, f1, DIR_0PP, cs); + (dist.f[DIR_0MP])[kts ] = computeOutflowDistribution(f, f1, DIR_0MP, cs); + (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs); + (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs); + (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs); + (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); + break; + + case ZZP: + (dist.f[DIR_00M])[kb ] = computeOutflowDistribution(f, f1, DIR_00M, cs); + (dist.f[DIR_P0M])[kbe ] = computeOutflowDistribution(f, f1, DIR_P0M, cs); + (dist.f[DIR_M0M])[kbw ] = computeOutflowDistribution(f, f1, DIR_M0M, cs); + (dist.f[DIR_0PM])[kbn ] = computeOutflowDistribution(f, f1, DIR_0PM, cs); + (dist.f[DIR_0MM])[kbs ] = computeOutflowDistribution(f, f1, DIR_0MM, cs); + (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs); + (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs); + (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs); + (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs); + break; + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + - //real vx1, vx2, vx3, drho; - //real vx1, vx2, vx3, drho, drho1; - ////////////////////////////////////////////////////////////////////////// - //Dichte - // drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW + - // f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + - // f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); - // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + - // f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + - // f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); - - ////////////////////////////////////////////////////////////////////////// - //Ux - //vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + - // ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + - // (f_E - f_W)) /(one + drho); - // vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + - // ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + - // (f_N - f_S)) /(one + drho); - // vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + - // (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + - // (f_T - f_B)) /(one + drho); - //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); - // ////////////////////////////////////////////////////////////////////////// - ////real omega = om1; - // real cusq = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); - // ////////////////////////////////////////////////////////////////////////// - ////T�st MK - ////if(vx1 < zero) vx1 = zero; - // ////////////////////////////////////////////////////////////////////////// - // real fZERO = c8over27* (drho1-(one + drho1)*(cusq)) ; - // real fE = c2over27* (drho1+(one + drho1)*(three*( vx1 )+c9over2*( vx1 )*( vx1 )-cusq)); - // real fW = c2over27* (drho1+(one + drho1)*(three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cusq)); - // real fN = c2over27* (drho1+(one + drho1)*(three*( vx2 )+c9over2*( vx2 )*( vx2 )-cusq)); - // real fS = c2over27* (drho1+(one + drho1)*(three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cusq)); - // real fT = c2over27* (drho1+(one + drho1)*(three*( vx3)+c9over2*( vx3)*( vx3)-cusq)); - // real fB = c2over27* (drho1+(one + drho1)*(three*( -vx3)+c9over2*( -vx3)*( -vx3)-cusq)); - // real fNE = c1over54* (drho1+(one + drho1)*(three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cusq)); - // real fSW = c1over54* (drho1+(one + drho1)*(three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cusq)); - // real fSE = c1over54* (drho1+(one + drho1)*(three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cusq)); - // real fNW = c1over54* (drho1+(one + drho1)*(three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cusq)); - // real fTE = c1over54* (drho1+(one + drho1)*(three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cusq)); - // real fBW = c1over54* (drho1+(one + drho1)*(three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cusq)); - // real fBE = c1over54* (drho1+(one + drho1)*(three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cusq)); - // real fTW = c1over54* (drho1+(one + drho1)*(three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cusq)); - // real fTN = c1over54* (drho1+(one + drho1)*(three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cusq)); - // real fBS = c1over54* (drho1+(one + drho1)*(three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cusq)); - // real fBN = c1over54* (drho1+(one + drho1)*(three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cusq)); - // real fTS = c1over54* (drho1+(one + drho1)*(three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cusq)); - // real fTNE = c1over216* (drho1+(one + drho1)*(three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq)); - // real fBSW = c1over216* (drho1+(one + drho1)*(three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq)); - // real fBNE = c1over216* (drho1+(one + drho1)*(three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq)); - // real fTSW = c1over216* (drho1+(one + drho1)*(three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq)); - // real fTSE = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq)); - // real fBNW = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq)); - // real fBSE = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); - // real fTNW = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); - - real cs = c1o1 / sqrtf(c3o1); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //no velocity - ////////////////////////////////////////// - f_E = f1_E * cs + (c1o1 - cs) * f_E ; - f_W = f1_W * cs + (c1o1 - cs) * f_W ; - f_N = f1_N * cs + (c1o1 - cs) * f_N ; - f_S = f1_S * cs + (c1o1 - cs) * f_S ; - f_T = f1_T * cs + (c1o1 - cs) * f_T ; - f_B = f1_B * cs + (c1o1 - cs) * f_B ; - f_NE = f1_NE * cs + (c1o1 - cs) * f_NE ; - f_SW = f1_SW * cs + (c1o1 - cs) * f_SW ; - f_SE = f1_SE * cs + (c1o1 - cs) * f_SE ; - f_NW = f1_NW * cs + (c1o1 - cs) * f_NW ; - f_TE = f1_TE * cs + (c1o1 - cs) * f_TE ; - f_BW = f1_BW * cs + (c1o1 - cs) * f_BW ; - f_BE = f1_BE * cs + (c1o1 - cs) * f_BE ; - f_TW = f1_TW * cs + (c1o1 - cs) * f_TW ; - f_TN = f1_TN * cs + (c1o1 - cs) * f_TN ; - f_BS = f1_BS * cs + (c1o1 - cs) * f_BS ; - f_BN = f1_BN * cs + (c1o1 - cs) * f_BN ; - f_TS = f1_TS * cs + (c1o1 - cs) * f_TS ; - f_TNE = f1_TNE * cs + (c1o1 - cs) * f_TNE ; - f_TSW = f1_TSW * cs + (c1o1 - cs) * f_TSW ; - f_TSE = f1_TSE * cs + (c1o1 - cs) * f_TSE ; - f_TNW = f1_TNW * cs + (c1o1 - cs) * f_TNW ; - f_BNE = f1_BNE * cs + (c1o1 - cs) * f_BNE ; - f_BSW = f1_BSW * cs + (c1o1 - cs) * f_BSW ; - f_BSE = f1_BSE * cs + (c1o1 - cs) * f_BSE ; - f_BNW = f1_BNW * cs + (c1o1 - cs) * f_BNW ; - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //with velocity - //if(true){//vx1 >= zero){ - // real csMvx = one / sqrtf(three) - vx1; - // //real csMvy = one / sqrtf(three) - vx2; - // /////////////////////////////////////////// - // // X - // f_W = f1_W * csMvx + (one - csMvx) * f_W ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_NW = f1_NW * csMvx + (one - csMvx) * f_NW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_SW = f1_SW * csMvx + (one - csMvx) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TW = f1_TW * csMvx + (one - csMvx) * f_TW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BW = f1_BW * csMvx + (one - csMvx) * f_BW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1); - // /////////////////////////////////////////// - // // Y - // //f_S = f1_S * csMvy + (one - csMvy) * f_S ;//- c2over27 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS ;//- c1over54 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2); - // //f_S = f1_S * csMvy + (one - csMvy) * f_S; - // //f_SE = f1_SE * csMvy + (one - csMvy) * f_SE; - // //f_SW = f1_SW * csMvy + (one - csMvy) * f_SW; - // //f_TS = f1_TS * csMvy + (one - csMvy) * f_TS; - // //f_BS = f1_BS * csMvy + (one - csMvy) * f_BS; - // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE; - // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW; - // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE; - // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW; - // ////////////////////////////////////////////////////////////////////////// - //} - //else - //{ - // /////////////////////////////////////////// - // // X - // vx1 = vx1 * 0.9; - // f_W = f_E - six * c2over27 * ( vx1 ); - // f_NW = f_SE - six * c1over54 * ( vx1-vx2 ); - // f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // f_TW = f_BE - six * c1over54 * ( vx1 -vx3); - // f_BW = f_TE - six * c1over54 * ( vx1 +vx3); - // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3); - // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3); - // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - // // Y - // //vx2 = vx2 * 0.9; - // //f_S = f_N - six * c2over27 * ( vx2 ); - // //f_SE = f_NW - six * c1over54 * (-vx1+vx2 ); - // //f_SW = f_NE - six * c1over54 * ( vx1+vx2 ); - // //f_TS = f_BN - six * c1over54 * ( vx2-vx3); - // //f_BS = f_TN - six * c1over54 * ( vx2+vx3); - // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3); - // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3); - // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3); - // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3); - // /////////////////////////////////////////// - //} - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - ////////////////////////////////////////////////////////////////////////// - //__syncthreads(); - // -X - //(D.f[DIR_P00 ])[ke ] = f_E ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_PP0 ])[kne ] = f_NE ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_P0P ])[kte ] = f_TE ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_PPP ])[ktne ] = f_TNE ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - // X - (D.f[DIR_M00 ])[kw ] = f_W ; - (D.f[DIR_MM0 ])[ksw ] = f_SW ; - (D.f[DIR_MP0 ])[knw ] = f_NW ; - (D.f[DIR_M0M ])[kbw ] = f_BW ; - (D.f[DIR_M0P ])[ktw ] = f_TW ; - (D.f[DIR_MMP ])[ktsw ] = f_TSW ; - (D.f[DIR_MPP ])[ktnw ] = f_TNW ; - (D.f[DIR_MMM ])[kbsw ] = f_BSW ; - (D.f[DIR_MPM ])[kbnw ] = f_BNW ; - // Y - //(D.f[DIR_0M0 ])[ks ] = f_S ; - //(D.f[DIR_PM0 ])[kse ] = f_SE ; - //(D.f[DIR_MM0 ])[ksw ] = f_SW ; - //(D.f[DIR_0MP ])[kts ] = f_TS ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PMP ])[ktse ] = f_TSE ; - //(D.f[DIR_MMP ])[ktsw ] = f_TSW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - // Z - //(D.f[DIR_00M ])[kb ] = f_B ; - //(D.f[DIR_P0M ])[kbe ] = f_BE ; - //(D.f[DIR_M0M ])[kbw ] = f_BW ; - //(D.f[DIR_0PM ])[kbn ] = f_BN ; - //(D.f[DIR_0MM ])[kbs ] = f_BS ; - //(D.f[DIR_PPM ])[kbne ] = f_BNE ; - //(D.f[DIR_MPM ])[kbnw ] = f_BNW ; - //(D.f[DIR_PMM ])[kbse ] = f_BSE ; - //(D.f[DIR_MMM ])[kbsw ] = f_BSW ; - ////////////////////////////////////////////////////////////////////////// - } -} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight) +{ + return f1[dir ] * cs + (c1o1 - cs) * f[dir ] - weight *rhoCorrection; +} + +__global__ void QPressZeroRhoOutflowDevice27( + real* rhoBC, + real* distributions, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + int direction, + real densityCorrectionFactor) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); + ////////////////////////////////////////////////////////////////////////// + if( nodeIndex >= numberOfBCnodes ) return; + //////////////////////////////////////////////////////////////////////////////// + //index + uint k_000 = k_Q[nodeIndex]; + uint k_M00 = neighborX[k_000]; + uint k_0M0 = neighborY[k_000]; + uint k_00M = neighborZ[k_000]; + uint k_MM0 = neighborY[k_M00]; + uint k_M0M = neighborZ[k_M00]; + uint k_0MM = neighborZ[k_0M0]; + uint k_MMM = neighborZ[k_MM0]; + //////////////////////////////////////////////////////////////////////////////// + //index of neighbor + uint kN_000 = k_N[nodeIndex]; + uint kN_M00 = neighborX[k_000]; + uint kN_0M0 = neighborY[k_000]; + uint kN_00M = neighborZ[k_000]; + uint kN_MM0 = neighborY[k_M00]; + uint kN_M0M = neighborZ[k_M00]; + uint kN_0MM = neighborZ[k_0M0]; + uint kN_MMM = neighborZ[k_MM0]; + //////////////////////////////////////////////////////////////////////////////// + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + real f[27], fN[27]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + f[DIR_000] = (dist.f[DIR_000])[k_000]; + f[DIR_P00] = (dist.f[DIR_P00])[k_000]; + f[DIR_M00] = (dist.f[DIR_M00])[k_M00]; + f[DIR_0P0] = (dist.f[DIR_0P0])[k_000]; + f[DIR_0M0] = (dist.f[DIR_0M0])[k_0M0]; + f[DIR_00P] = (dist.f[DIR_00P])[k_000]; + f[DIR_00M] = (dist.f[DIR_00M])[k_00M]; + f[DIR_PP0] = (dist.f[DIR_PP0])[k_000]; + f[DIR_MM0] = (dist.f[DIR_MM0])[k_MM0]; + f[DIR_PM0] = (dist.f[DIR_PM0])[k_0M0]; + f[DIR_MP0] = (dist.f[DIR_MP0])[k_M00]; + f[DIR_P0P] = (dist.f[DIR_P0P])[k_000]; + f[DIR_M0M] = (dist.f[DIR_M0M])[k_M0M]; + f[DIR_P0M] = (dist.f[DIR_P0M])[k_00M]; + f[DIR_M0P] = (dist.f[DIR_M0P])[k_M00]; + f[DIR_0PP] = (dist.f[DIR_0PP])[k_000]; + f[DIR_0MM] = (dist.f[DIR_0MM])[k_0MM]; + f[DIR_0PM] = (dist.f[DIR_0PM])[k_00M]; + f[DIR_0MP] = (dist.f[DIR_0MP])[k_0M0]; + f[DIR_PPP] = (dist.f[DIR_PPP])[k_000]; + f[DIR_MPP] = (dist.f[DIR_MPP])[k_M00]; + f[DIR_PMP] = (dist.f[DIR_PMP])[k_0M0]; + f[DIR_MMP] = (dist.f[DIR_MMP])[k_MM0]; + f[DIR_PPM] = (dist.f[DIR_PPM])[k_00M]; + f[DIR_MPM] = (dist.f[DIR_MPM])[k_M0M]; + f[DIR_PMM] = (dist.f[DIR_PMM])[k_0MM]; + f[DIR_MMM] = (dist.f[DIR_MMM])[k_MMM]; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + fN[DIR_000] = (dist.f[DIR_000])[kN_000]; + fN[DIR_P00] = (dist.f[DIR_P00])[kN_000]; + fN[DIR_M00] = (dist.f[DIR_M00])[kN_M00]; + fN[DIR_0P0] = (dist.f[DIR_0P0])[kN_000]; + fN[DIR_0M0] = (dist.f[DIR_0M0])[kN_0M0]; + fN[DIR_00P] = (dist.f[DIR_00P])[kN_000]; + fN[DIR_00M] = (dist.f[DIR_00M])[kN_00M]; + fN[DIR_PP0] = (dist.f[DIR_PP0])[kN_000]; + fN[DIR_MM0] = (dist.f[DIR_MM0])[kN_MM0]; + fN[DIR_PM0] = (dist.f[DIR_PM0])[kN_0M0]; + fN[DIR_MP0] = (dist.f[DIR_MP0])[kN_M00]; + fN[DIR_P0P] = (dist.f[DIR_P0P])[kN_000]; + fN[DIR_M0M] = (dist.f[DIR_M0M])[kN_M0M]; + fN[DIR_P0M] = (dist.f[DIR_P0M])[kN_00M]; + fN[DIR_M0P] = (dist.f[DIR_M0P])[kN_M00]; + fN[DIR_0PP] = (dist.f[DIR_0PP])[kN_000]; + fN[DIR_0MM] = (dist.f[DIR_0MM])[kN_0MM]; + fN[DIR_0PM] = (dist.f[DIR_0PM])[kN_00M]; + fN[DIR_0MP] = (dist.f[DIR_0MP])[kN_0M0]; + fN[DIR_PPP] = (dist.f[DIR_PPP])[kN_000]; + fN[DIR_MPP] = (dist.f[DIR_MPP])[kN_M00]; + fN[DIR_PMP] = (dist.f[DIR_PMP])[kN_0M0]; + fN[DIR_MMP] = (dist.f[DIR_MMP])[kN_MM0]; + fN[DIR_PPM] = (dist.f[DIR_PPM])[kN_00M]; + fN[DIR_MPM] = (dist.f[DIR_MPM])[kN_M0M]; + fN[DIR_PMM] = (dist.f[DIR_PMM])[kN_0MM]; + fN[DIR_MMM] = (dist.f[DIR_MMM])[kN_MMM]; + ////////////////////////////////////////////////////////////////////////// + real drho = vf::lbm::getDensity(f); + real rhoCorrection = densityCorrectionFactor*drho; + real cs = c1o1 / sqrtf(c3o1); + getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep); + switch(direction) + { + case MZZ: + (dist.f[DIR_P00])[k_000] = computeOutflowDistribution(f, fN, DIR_P00 , rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_PP0])[k_000] = computeOutflowDistribution(f, fN, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0M])[k_00M] = computeOutflowDistribution(f, fN, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_P0P])[k_000] = computeOutflowDistribution(f, fN, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216); + break; + + case PZZ: + (dist.f[DIR_M00])[k_M00] = computeOutflowDistribution(f, fN, DIR_M00, rhoCorrection, cs, c2o27); + (dist.f[DIR_MM0])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[k_M00] = computeOutflowDistribution(f, fN, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[k_M0M] = computeOutflowDistribution(f, fN, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[k_M00] = computeOutflowDistribution(f, fN, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZMZ: + (dist.f[DIR_0P0])[k_000] = computeOutflowDistribution(f, fN, DIR_0P0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PP0])[k_000] = computeOutflowDistribution(f, fN, DIR_PP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MP0])[k_M00] = computeOutflowDistribution(f, fN, DIR_MP0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[k_000] = computeOutflowDistribution(f, fN, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[k_00M] = computeOutflowDistribution(f, fN, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216); + break; + + case ZPZ: + (dist.f[DIR_0M0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_0M0, rhoCorrection, cs, c2o27); + (dist.f[DIR_PM0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_PM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_MM0])[k_MM0] =computeOutflowDistribution(f, fN, DIR_MM0, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[k_0M0] =computeOutflowDistribution(f, fN, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[k_0MM] =computeOutflowDistribution(f, fN, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PMP])[k_0M0] =computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[k_MM0] =computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[k_0MM] =computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[k_MMM] =computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216); + break; + + case ZZM: + (dist.f[DIR_00P])[k_000] = computeOutflowDistribution(f, fN, DIR_00P, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0P])[k_000] = computeOutflowDistribution(f, fN, DIR_P0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0P])[k_M00] = computeOutflowDistribution(f, fN, DIR_M0P, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PP])[k_000] = computeOutflowDistribution(f, fN, DIR_0PP, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_0MP, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216); + break; + + case ZZP: + (dist.f[DIR_00M])[k_00M] = computeOutflowDistribution(f, fN, DIR_00M, rhoCorrection, cs, c2o27); + (dist.f[DIR_P0M])[k_00M] = computeOutflowDistribution(f, fN, DIR_P0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_M0M])[k_M0M] = computeOutflowDistribution(f, fN, DIR_M0M, rhoCorrection, cs, c1o54); + (dist.f[DIR_0PM])[k_00M] = computeOutflowDistribution(f, fN, DIR_0PM, rhoCorrection, cs, c1o54); + (dist.f[DIR_0MM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_0MM, rhoCorrection, cs, c1o54); + (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216); + (dist.f[DIR_PMM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216); + (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216); + break; + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3314,22 +3327,23 @@ __global__ void QPressNoRhoDevice27( real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceOld27(real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceOld27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -3403,133 +3417,133 @@ __global__ void QPressDeviceOld27(real* rhoBC, Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_W = (D.f[DIR_P00 ])[k1e ]; - f1_E = (D.f[DIR_M00 ])[k1w ]; - f1_S = (D.f[DIR_0P0 ])[k1n ]; - f1_N = (D.f[DIR_0M0 ])[k1s ]; - f1_B = (D.f[DIR_00P ])[k1t ]; - f1_T = (D.f[DIR_00M ])[k1b ]; - f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - f1_NW = (D.f[DIR_PM0 ])[k1se ]; - f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - f1_BW = (D.f[DIR_P0P ])[k1te ]; - f1_TE = (D.f[DIR_M0M ])[k1bw ]; - f1_TW = (D.f[DIR_P0M ])[k1be ]; - f1_BE = (D.f[DIR_M0P ])[k1tw ]; - f1_BS = (D.f[DIR_0PP ])[k1tn ]; - f1_TN = (D.f[DIR_0MM ])[k1bs ]; - f1_TS = (D.f[DIR_0PM ])[k1bn ]; - f1_BN = (D.f[DIR_0MP ])[k1ts ]; + f1_W = (D.f[DIR_P00])[k1e ]; + f1_E = (D.f[DIR_M00])[k1w ]; + f1_S = (D.f[DIR_0P0])[k1n ]; + f1_N = (D.f[DIR_0M0])[k1s ]; + f1_B = (D.f[DIR_00P])[k1t ]; + f1_T = (D.f[DIR_00M])[k1b ]; + f1_SW = (D.f[DIR_PP0])[k1ne ]; + f1_NE = (D.f[DIR_MM0])[k1sw ]; + f1_NW = (D.f[DIR_PM0])[k1se ]; + f1_SE = (D.f[DIR_MP0])[k1nw ]; + f1_BW = (D.f[DIR_P0P])[k1te ]; + f1_TE = (D.f[DIR_M0M])[k1bw ]; + f1_TW = (D.f[DIR_P0M])[k1be ]; + f1_BE = (D.f[DIR_M0P])[k1tw ]; + f1_BS = (D.f[DIR_0PP])[k1tn ]; + f1_TN = (D.f[DIR_0MM])[k1bs ]; + f1_TS = (D.f[DIR_0PM])[k1bn ]; + f1_BN = (D.f[DIR_0MP])[k1ts ]; f1_ZERO = (D.f[DIR_000])[k1zero]; - f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + f1_BSW = (D.f[DIR_PPP])[k1tne ]; + f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + f1_BNW = (D.f[DIR_PMP])[k1tse ]; + f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + f1_TSW = (D.f[DIR_PPM])[k1bne ]; + f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + f1_TNW = (D.f[DIR_PMM])[k1bse ]; + f1_TSE = (D.f[DIR_MPM])[k1bnw ]; ////////////////////////////////////////////////////////////////////////// real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; - //drho1 = (drho1 + rhoBC[k])/2.f; - drho1 = drho1 - rhoBC[k]; + //drho1 = (drho1 + rhoBC[k])/2.f; + drho1 = drho1 - rhoBC[k]; ////////////////////////////////////////////////////////////////////////// __syncthreads(); - (D.f[DIR_P00 ])[ke ] = f1_W -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_M00 ])[kw ] = f1_E -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_0P0 ])[kn ] = f1_S -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_0M0 ])[ks ] = f1_N -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_00P ])[kt ] = f1_B -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_00M ])[kb ] = f1_T -c2o27*drho1; // c1o100; // zero; // - (D.f[DIR_PP0 ])[kne ] = f1_SW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_MM0 ])[ksw ] = f1_NE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_PM0 ])[kse ] = f1_NW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_MP0 ])[knw ] = f1_SE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_P0P ])[kte ] = f1_BW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_M0M ])[kbw ] = f1_TE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_P0M ])[kbe ] = f1_TW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_M0P ])[ktw ] = f1_BE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0PP ])[ktn ] = f1_BS -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0MM ])[kbs ] = f1_TN -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0PM ])[kbn ] = f1_TS -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0MP ])[kts ] = f1_BN -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P00])[ke ] = f1_W -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_M00])[kw ] = f1_E -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_0P0])[kn ] = f1_S -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_0M0])[ks ] = f1_N -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_00P])[kt ] = f1_B -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_00M])[kb ] = f1_T -c2o27*drho1; // c1o100; // zero; // + (D.f[DIR_PP0])[kne ] = f1_SW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_MM0])[ksw ] = f1_NE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_PM0])[kse ] = f1_NW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_MP0])[knw ] = f1_SE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P0P])[kte ] = f1_BW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_M0M])[kbw ] = f1_TE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P0M])[kbe ] = f1_TW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_M0P])[ktw ] = f1_BE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0PP])[ktn ] = f1_BS -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0MM])[kbs ] = f1_TN -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0PM])[kbn ] = f1_TS -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0MP])[kts ] = f1_BN -c1o54*drho1; // c1o100; // zero; // (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1; // c1o100; // zero; // - (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1; // c1o100; // zero; // } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3573,23 +3587,24 @@ __global__ void QPressDeviceOld27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceEQZ27(real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - real* kTestRE, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceEQZ27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + real* kTestRE, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -3663,168 +3678,168 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// // Distributions27 kDistTest; - // kDistTest.f[DIR_P00 ] = &kTestRE[DIR_P00 *numberOfBCnodes]; - // kDistTest.f[DIR_M00 ] = &kTestRE[DIR_M00 *numberOfBCnodes]; - // kDistTest.f[DIR_0P0 ] = &kTestRE[DIR_0P0 *numberOfBCnodes]; - // kDistTest.f[DIR_0M0 ] = &kTestRE[DIR_0M0 *numberOfBCnodes]; - // kDistTest.f[DIR_00P ] = &kTestRE[DIR_00P *numberOfBCnodes]; - // kDistTest.f[DIR_00M ] = &kTestRE[DIR_00M *numberOfBCnodes]; - // kDistTest.f[DIR_PP0 ] = &kTestRE[DIR_PP0 *numberOfBCnodes]; - // kDistTest.f[DIR_MM0 ] = &kTestRE[DIR_MM0 *numberOfBCnodes]; - // kDistTest.f[DIR_PM0 ] = &kTestRE[DIR_PM0 *numberOfBCnodes]; - // kDistTest.f[DIR_MP0 ] = &kTestRE[DIR_MP0 *numberOfBCnodes]; - // kDistTest.f[DIR_P0P ] = &kTestRE[DIR_P0P *numberOfBCnodes]; - // kDistTest.f[DIR_M0M ] = &kTestRE[DIR_M0M *numberOfBCnodes]; - // kDistTest.f[DIR_P0M ] = &kTestRE[DIR_P0M *numberOfBCnodes]; - // kDistTest.f[DIR_M0P ] = &kTestRE[DIR_M0P *numberOfBCnodes]; - // kDistTest.f[DIR_0PP ] = &kTestRE[DIR_0PP *numberOfBCnodes]; - // kDistTest.f[DIR_0MM ] = &kTestRE[DIR_0MM *numberOfBCnodes]; - // kDistTest.f[DIR_0PM ] = &kTestRE[DIR_0PM *numberOfBCnodes]; - // kDistTest.f[DIR_0MP ] = &kTestRE[DIR_0MP *numberOfBCnodes]; - // kDistTest.f[DIR_000] = &kTestRE[DIR_000*numberOfBCnodes]; - // kDistTest.f[DIR_PPP ] = &kTestRE[DIR_PPP *numberOfBCnodes]; - // kDistTest.f[DIR_MMP ] = &kTestRE[DIR_MMP *numberOfBCnodes]; - // kDistTest.f[DIR_PMP ] = &kTestRE[DIR_PMP *numberOfBCnodes]; - // kDistTest.f[DIR_MPP ] = &kTestRE[DIR_MPP *numberOfBCnodes]; - // kDistTest.f[DIR_PPM ] = &kTestRE[DIR_PPM *numberOfBCnodes]; - // kDistTest.f[DIR_MMM ] = &kTestRE[DIR_MMM *numberOfBCnodes]; - // kDistTest.f[DIR_PMM ] = &kTestRE[DIR_PMM *numberOfBCnodes]; - // kDistTest.f[DIR_MPM ] = &kTestRE[DIR_MPM *numberOfBCnodes]; + // kDistTest.f[DIR_P00] = &kTestRE[DIR_P00 * numberOfBCnodes]; + // kDistTest.f[DIR_M00] = &kTestRE[DIR_M00 * numberOfBCnodes]; + // kDistTest.f[DIR_0P0] = &kTestRE[DIR_0P0 * numberOfBCnodes]; + // kDistTest.f[DIR_0M0] = &kTestRE[DIR_0M0 * numberOfBCnodes]; + // kDistTest.f[DIR_00P] = &kTestRE[DIR_00P * numberOfBCnodes]; + // kDistTest.f[DIR_00M] = &kTestRE[DIR_00M * numberOfBCnodes]; + // kDistTest.f[DIR_PP0] = &kTestRE[DIR_PP0 * numberOfBCnodes]; + // kDistTest.f[DIR_MM0] = &kTestRE[DIR_MM0 * numberOfBCnodes]; + // kDistTest.f[DIR_PM0] = &kTestRE[DIR_PM0 * numberOfBCnodes]; + // kDistTest.f[DIR_MP0] = &kTestRE[DIR_MP0 * numberOfBCnodes]; + // kDistTest.f[DIR_P0P] = &kTestRE[DIR_P0P * numberOfBCnodes]; + // kDistTest.f[DIR_M0M] = &kTestRE[DIR_M0M * numberOfBCnodes]; + // kDistTest.f[DIR_P0M] = &kTestRE[DIR_P0M * numberOfBCnodes]; + // kDistTest.f[DIR_M0P] = &kTestRE[DIR_M0P * numberOfBCnodes]; + // kDistTest.f[DIR_0PP] = &kTestRE[DIR_0PP * numberOfBCnodes]; + // kDistTest.f[DIR_0MM] = &kTestRE[DIR_0MM * numberOfBCnodes]; + // kDistTest.f[DIR_0PM] = &kTestRE[DIR_0PM * numberOfBCnodes]; + // kDistTest.f[DIR_0MP] = &kTestRE[DIR_0MP * numberOfBCnodes]; + // kDistTest.f[DIR_000] = &kTestRE[DIR_000 * numberOfBCnodes]; + // kDistTest.f[DIR_PPP] = &kTestRE[DIR_PPP * numberOfBCnodes]; + // kDistTest.f[DIR_MMP] = &kTestRE[DIR_MMP * numberOfBCnodes]; + // kDistTest.f[DIR_PMP] = &kTestRE[DIR_PMP * numberOfBCnodes]; + // kDistTest.f[DIR_MPP] = &kTestRE[DIR_MPP * numberOfBCnodes]; + // kDistTest.f[DIR_PPM] = &kTestRE[DIR_PPM * numberOfBCnodes]; + // kDistTest.f[DIR_MMM] = &kTestRE[DIR_MMM * numberOfBCnodes]; + // kDistTest.f[DIR_PMM] = &kTestRE[DIR_PMM * numberOfBCnodes]; + // kDistTest.f[DIR_MPM] = &kTestRE[DIR_MPM * numberOfBCnodes]; // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // //real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - // //f1_W = (D.f[DIR_P00 ])[k1e ]; - // //f1_E = (D.f[DIR_M00 ])[k1w ]; - // //f1_S = (D.f[DIR_0P0 ])[k1n ]; - // //f1_N = (D.f[DIR_0M0 ])[k1s ]; - // //f1_B = (D.f[DIR_00P ])[k1t ]; - // //f1_T = (D.f[DIR_00M ])[k1b ]; - // //f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - // //f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - // //f1_NW = (D.f[DIR_PM0 ])[k1se ]; - // //f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - // //f1_BW = (D.f[DIR_P0P ])[k1te ]; - // //f1_TE = (D.f[DIR_M0M ])[k1bw ]; - // //f1_TW = (D.f[DIR_P0M ])[k1be ]; - // //f1_BE = (D.f[DIR_M0P ])[k1tw ]; - // //f1_BS = (D.f[DIR_0PP ])[k1tn ]; - // //f1_TN = (D.f[DIR_0MM ])[k1bs ]; - // //f1_TS = (D.f[DIR_0PM ])[k1bn ]; - // //f1_BN = (D.f[DIR_0MP ])[k1ts ]; + // //f1_W = (D.f[DIR_P00])[k1e ]; + // //f1_E = (D.f[DIR_M00])[k1w ]; + // //f1_S = (D.f[DIR_0P0])[k1n ]; + // //f1_N = (D.f[DIR_0M0])[k1s ]; + // //f1_B = (D.f[DIR_00P])[k1t ]; + // //f1_T = (D.f[DIR_00M])[k1b ]; + // //f1_SW = (D.f[DIR_PP0])[k1ne ]; + // //f1_NE = (D.f[DIR_MM0])[k1sw ]; + // //f1_NW = (D.f[DIR_PM0])[k1se ]; + // //f1_SE = (D.f[DIR_MP0])[k1nw ]; + // //f1_BW = (D.f[DIR_P0P])[k1te ]; + // //f1_TE = (D.f[DIR_M0M])[k1bw ]; + // //f1_TW = (D.f[DIR_P0M])[k1be ]; + // //f1_BE = (D.f[DIR_M0P])[k1tw ]; + // //f1_BS = (D.f[DIR_0PP])[k1tn ]; + // //f1_TN = (D.f[DIR_0MM])[k1bs ]; + // //f1_TS = (D.f[DIR_0PM])[k1bn ]; + // //f1_BN = (D.f[DIR_0MP])[k1ts ]; // //f1_ZERO = (D.f[DIR_000])[k1zero]; - // //f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - // //f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - // //f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - // //f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - // //f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - // //f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - // //f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - // //f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + // //f1_BSW = (D.f[DIR_PPP])[k1tne ]; + // //f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + // //f1_BNW = (D.f[DIR_PMP])[k1tse ]; + // //f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + // //f1_TSW = (D.f[DIR_PPM])[k1bne ]; + // //f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + // //f1_TNW = (D.f[DIR_PMM])[k1bse ]; + // //f1_TSE = (D.f[DIR_MPM])[k1bnw ]; // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - // f1_E = (D.f[DIR_P00 ])[k1e ]; - // f1_W = (D.f[DIR_M00 ])[k1w ]; - // f1_N = (D.f[DIR_0P0 ])[k1n ]; - // f1_S = (D.f[DIR_0M0 ])[k1s ]; - // f1_T = (D.f[DIR_00P ])[k1t ]; - // f1_B = (D.f[DIR_00M ])[k1b ]; - // f1_NE = (D.f[DIR_PP0 ])[k1ne ]; - // f1_SW = (D.f[DIR_MM0 ])[k1sw ]; - // f1_SE = (D.f[DIR_PM0 ])[k1se ]; - // f1_NW = (D.f[DIR_MP0 ])[k1nw ]; - // f1_TE = (D.f[DIR_P0P ])[k1te ]; - // f1_BW = (D.f[DIR_M0M ])[k1bw ]; - // f1_BE = (D.f[DIR_P0M ])[k1be ]; - // f1_TW = (D.f[DIR_M0P ])[k1tw ]; - // f1_TN = (D.f[DIR_0PP ])[k1tn ]; - // f1_BS = (D.f[DIR_0MM ])[k1bs ]; - // f1_BN = (D.f[DIR_0PM ])[k1bn ]; - // f1_TS = (D.f[DIR_0MP ])[k1ts ]; + // f1_E = (D.f[DIR_P00])[k1e ]; + // f1_W = (D.f[DIR_M00])[k1w ]; + // f1_N = (D.f[DIR_0P0])[k1n ]; + // f1_S = (D.f[DIR_0M0])[k1s ]; + // f1_T = (D.f[DIR_00P])[k1t ]; + // f1_B = (D.f[DIR_00M])[k1b ]; + // f1_NE = (D.f[DIR_PP0])[k1ne ]; + // f1_SW = (D.f[DIR_MM0])[k1sw ]; + // f1_SE = (D.f[DIR_PM0])[k1se ]; + // f1_NW = (D.f[DIR_MP0])[k1nw ]; + // f1_TE = (D.f[DIR_P0P])[k1te ]; + // f1_BW = (D.f[DIR_M0M])[k1bw ]; + // f1_BE = (D.f[DIR_P0M])[k1be ]; + // f1_TW = (D.f[DIR_M0P])[k1tw ]; + // f1_TN = (D.f[DIR_0PP])[k1tn ]; + // f1_BS = (D.f[DIR_0MM])[k1bs ]; + // f1_BN = (D.f[DIR_0PM])[k1bn ]; + // f1_TS = (D.f[DIR_0MP])[k1ts ]; // f1_ZERO = (D.f[DIR_000])[k1zero]; - // f1_TNE = (D.f[DIR_PPP ])[k1tne ]; - // f1_TSW = (D.f[DIR_MMP ])[k1tsw ]; - // f1_TSE = (D.f[DIR_PMP ])[k1tse ]; - // f1_TNW = (D.f[DIR_MPP ])[k1tnw ]; - // f1_BNE = (D.f[DIR_PPM ])[k1bne ]; - // f1_BSW = (D.f[DIR_MMM ])[k1bsw ]; - // f1_BSE = (D.f[DIR_PMM ])[k1bse ]; - // f1_BNW = (D.f[DIR_MPM ])[k1bnw ]; + // f1_TNE = (D.f[DIR_PPP])[k1tne ]; + // f1_TSW = (D.f[DIR_MMP])[k1tsw ]; + // f1_TSE = (D.f[DIR_PMP])[k1tse ]; + // f1_TNW = (D.f[DIR_MPP])[k1tnw ]; + // f1_BNE = (D.f[DIR_PPM])[k1bne ]; + // f1_BSW = (D.f[DIR_MMM])[k1bsw ]; + // f1_BSE = (D.f[DIR_PMM])[k1bse ]; + // f1_BNW = (D.f[DIR_MPM])[k1bnw ]; // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////////////// // real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; - //real vx1 = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1); - //real vx2 = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1); - //real vx3 = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1); + //real vx1 = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1); + //real vx2 = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1); + //real vx3 = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1); // ////////////////////////////////////////////////////////////////////////// - ////real omega = om1; + ////real omega = om1; // real cusq = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); // ////////////////////////////////////////////////////////////////////////// - ////T�st MK - ////if(vx1 < zero) vx1 = zero; + ////T�st MK + ////if(vx1 < zero) vx1 = zero; // ////////////////////////////////////////////////////////////////////////// - ////becomes higher with neighbor source and lower with local source + ////becomes higher with neighbor source and lower with local source // //real fZERO = c8over27* (rhoBC[k]-(one + rhoBC[k])*(cusq)) ; // //real fE = c2over27* (rhoBC[k]+(one + rhoBC[k])*(three*( vx1 )+c9over2*( vx1 )*( vx1 )-cusq)); // //real fW = c2over27* (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cusq)); @@ -3853,7 +3868,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, // //real fBSE = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq)); // //real fTNW = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq)); // ////////////////////////////////////////////////////////////////////////// - //// based on VirtualFluids (kucher + fard) + //// based on VirtualFluids (kucher + fard) // real fZERO = c8over27 * rhoBC[k] * (one - cusq); // real fE = c2over27 * rhoBC[k] * (one + three * ( vx1 ) + c9over2 * ( vx1 ) * ( vx1 ) - cusq); // real fW = c2over27 * rhoBC[k] * (one + three * (-vx1 ) + c9over2 * (-vx1 ) * (-vx1 ) - cusq); @@ -3882,7 +3897,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, // real fBSE = c1over216 * rhoBC[k] * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq); // real fTNW = c1over216 * rhoBC[k] * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq); //// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //////test + //////test //// real fZERO = c8over27 * ((drho1 + rhoBC[k]) / two) * (one - cusq); //// real fE = c2over27 * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1 ) + c9over2 * ( vx1 ) * ( vx1 ) - cusq); //// real fW = c2over27 * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1 ) + c9over2 * (-vx1 ) * (-vx1 ) - cusq); @@ -3911,190 +3926,190 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, //// real fBSE = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq); //// real fTNW = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq); - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // based on BGK Plus Comp - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //double mfabb = (D.f[DIR_P00 ])[k1e ]; - //double mfcbb = (D.f[DIR_M00 ])[k1w ]; - //double mfbab = (D.f[DIR_0P0 ])[k1n ]; - //double mfbcb = (D.f[DIR_0M0 ])[k1s ]; - //double mfbba = (D.f[DIR_00P ])[k1t ]; - //double mfbbc = (D.f[DIR_00M ])[k1b ]; - //double mfaab = (D.f[DIR_PP0 ])[k1ne ]; - //double mfccb = (D.f[DIR_MM0 ])[k1sw ]; - //double mfacb = (D.f[DIR_PM0 ])[k1se ]; - //double mfcab = (D.f[DIR_MP0 ])[k1nw ]; - //double mfaba = (D.f[DIR_P0P ])[k1te ]; - //double mfcbc = (D.f[DIR_M0M ])[k1bw ]; - //double mfabc = (D.f[DIR_P0M ])[k1be ]; - //double mfcba = (D.f[DIR_M0P ])[k1tw ]; - //double mfbaa = (D.f[DIR_0PP ])[k1tn ]; - //double mfbcc = (D.f[DIR_0MM ])[k1bs ]; - //double mfbac = (D.f[DIR_0PM ])[k1bn ]; - //double mfbca = (D.f[DIR_0MP ])[k1ts ]; - //double mfbbb = (D.f[DIR_000])[k1zero]; - //double mfaaa = (D.f[DIR_PPP ])[k1tne ]; - //double mfcca = (D.f[DIR_MMP ])[k1tsw ]; - //double mfaca = (D.f[DIR_PMP ])[k1tse ]; - //double mfcaa = (D.f[DIR_MPP ])[k1tnw ]; - //double mfaac = (D.f[DIR_PPM ])[k1bne ]; - //double mfccc = (D.f[DIR_MMM ])[k1bsw ]; - //double mfacc = (D.f[DIR_PMM ])[k1bse ]; - //double mfcac = (D.f[DIR_MPM ])[k1bnw ]; - real mfabb = (D.f[DIR_P00 ])[k1e ]; - real mfcbb = (D.f[DIR_M00 ])[k1w ]; - real mfbab = (D.f[DIR_0P0 ])[k1n ]; - real mfbcb = (D.f[DIR_0M0 ])[k1s ]; - real mfbba = (D.f[DIR_00P ])[k1t ]; - real mfbbc = (D.f[DIR_00M ])[k1b ]; - real mfaab = (D.f[DIR_PP0 ])[k1ne ]; - real mfccb = (D.f[DIR_MM0 ])[k1sw ]; - real mfacb = (D.f[DIR_PM0 ])[k1se ]; - real mfcab = (D.f[DIR_MP0 ])[k1nw ]; - real mfaba = (D.f[DIR_P0P ])[k1te ]; - real mfcbc = (D.f[DIR_M0M ])[k1bw ]; - real mfabc = (D.f[DIR_P0M ])[k1be ]; - real mfcba = (D.f[DIR_M0P ])[k1tw ]; - real mfbaa = (D.f[DIR_0PP ])[k1tn ]; - real mfbcc = (D.f[DIR_0MM ])[k1bs ]; - real mfbac = (D.f[DIR_0PM ])[k1bn ]; - real mfbca = (D.f[DIR_0MP ])[k1ts ]; - real mfbbb = (D.f[DIR_000])[k1zero]; - real mfaaa = (D.f[DIR_PPP ])[k1tne ]; - real mfcca = (D.f[DIR_MMP ])[k1tsw ]; - real mfaca = (D.f[DIR_PMP ])[k1tse ]; - real mfcaa = (D.f[DIR_MPP ])[k1tnw ]; - real mfaac = (D.f[DIR_PPM ])[k1bne ]; - real mfccc = (D.f[DIR_MMM ])[k1bsw ]; - real mfacc = (D.f[DIR_PMM ])[k1bse ]; - real mfcac = (D.f[DIR_MPM ])[k1bnw ]; - - //real mfcbb = (D.f[DIR_P00 ])[ke ]; - //real mfabb = (D.f[DIR_M00 ])[kw ]; - //real mfbcb = (D.f[DIR_0P0 ])[kn ]; - //real mfbab = (D.f[DIR_0M0 ])[ks ]; - //real mfbbc = (D.f[DIR_00P ])[kt ]; - //real mfbba = (D.f[DIR_00M ])[kb ]; - //real mfccb = (D.f[DIR_PP0 ])[kne ]; - //real mfaab = (D.f[DIR_MM0 ])[ksw ]; - //real mfcab = (D.f[DIR_PM0 ])[kse ]; - //real mfacb = (D.f[DIR_MP0 ])[knw ]; - //real mfcbc = (D.f[DIR_P0P ])[kte ]; - //real mfaba = (D.f[DIR_M0M ])[kbw ]; - //real mfcba = (D.f[DIR_P0M ])[kbe ]; - //real mfabc = (D.f[DIR_M0P ])[ktw ]; - //real mfbcc = (D.f[DIR_0PP ])[ktn ]; - //real mfbaa = (D.f[DIR_0MM ])[kbs ]; - //real mfbca = (D.f[DIR_0PM ])[kbn ]; - //real mfbac = (D.f[DIR_0MP ])[kts ]; - //real mfbbb = (D.f[DIR_000])[kzero]; - //real mfccc = (D.f[DIR_PPP ])[ktne ]; - //real mfaac = (D.f[DIR_MMP ])[ktsw ]; - //real mfcac = (D.f[DIR_PMP ])[ktse ]; - //real mfacc = (D.f[DIR_MPP ])[ktnw ]; - //real mfcca = (D.f[DIR_PPM ])[kbne ]; - //real mfaaa = (D.f[DIR_MMM ])[kbsw ]; - //real mfcaa = (D.f[DIR_PMM ])[kbse ]; - //real mfaca = (D.f[DIR_MPM ])[kbnw ]; - //////////////////////////////////////////////////////////////////////////////////// - //real rho = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + - // (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + - // ((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one - //////////////////////////////////////////////////////////////////////////////////// - real rho = rhoBC[k]; - //////////////////////////////////////////////////////////////////////////////////// - real OoRho = c1o1 / (rho * 1.5f); - //////////////////////////////////////////////////////////////////////////////////// - real vvx = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + - (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) + - (mfcbb-mfabb)) * OoRho; - real vvy =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) + - (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) + - (mfbcb-mfbab)) * OoRho; - real vvz =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) + - (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) + - (mfbbc-mfbba)) * OoRho; - ///////////////////////// - //Test Values - //double vvx = 0.016; - //double vvy = zero; - //double vvz = zero; - //////////////////////////////////////////////////////////////////////////////////////// - ////round off error test - //if(vvx!=zero){ - // (kDistTest.f[DIR_P00 ])[k] = mfabb; - // (kDistTest.f[DIR_M00 ])[k] = mfcbb; - // (kDistTest.f[DIR_0P0 ])[k] = mfbab; - // (kDistTest.f[DIR_0M0 ])[k] = mfbcb; - // (kDistTest.f[DIR_00P ])[k] = mfbba; - // (kDistTest.f[DIR_00M ])[k] = mfbbc; - // (kDistTest.f[DIR_PP0 ])[k] = mfaab; - // (kDistTest.f[DIR_MM0 ])[k] = mfccb; - // (kDistTest.f[DIR_PM0 ])[k] = mfacb; - // (kDistTest.f[DIR_MP0 ])[k] = mfcab; - // (kDistTest.f[DIR_P0P ])[k] = mfaba; - // (kDistTest.f[DIR_M0M ])[k] = mfcbc; - // (kDistTest.f[DIR_P0M ])[k] = mfabc; - // (kDistTest.f[DIR_M0P ])[k] = mfcba; - // (kDistTest.f[DIR_0PP ])[k] = mfbaa; - // (kDistTest.f[DIR_0MM ])[k] = mfbcc; - // (kDistTest.f[DIR_0PM ])[k] = mfbac; - // (kDistTest.f[DIR_0MP ])[k] = mfbca; - // (kDistTest.f[DIR_000])[k] = KQK; - // (kDistTest.f[DIR_PPP ])[k] = mfaaa; - // (kDistTest.f[DIR_MMP ])[k] = mfcca; - // (kDistTest.f[DIR_PMP ])[k] = mfaca; - // (kDistTest.f[DIR_MPP ])[k] = mfcaa; - // (kDistTest.f[DIR_PPM ])[k] = mfaac; - // (kDistTest.f[DIR_MMM ])[k] = mfccc; - // (kDistTest.f[DIR_PMM ])[k] = mfacc; - // (kDistTest.f[DIR_MPM ])[k] = mfcac; - //}else{ - // (kDistTest.f[DIR_P00 ])[k] = zero; - // (kDistTest.f[DIR_M00 ])[k] = zero; - // (kDistTest.f[DIR_0P0 ])[k] = zero; - // (kDistTest.f[DIR_0M0 ])[k] = zero; - // (kDistTest.f[DIR_00P ])[k] = zero; - // (kDistTest.f[DIR_00M ])[k] = zero; - // (kDistTest.f[DIR_PP0 ])[k] = zero; - // (kDistTest.f[DIR_MM0 ])[k] = zero; - // (kDistTest.f[DIR_PM0 ])[k] = zero; - // (kDistTest.f[DIR_MP0 ])[k] = zero; - // (kDistTest.f[DIR_P0P ])[k] = zero; - // (kDistTest.f[DIR_M0M ])[k] = zero; - // (kDistTest.f[DIR_P0M ])[k] = zero; - // (kDistTest.f[DIR_M0P ])[k] = zero; - // (kDistTest.f[DIR_0PP ])[k] = zero; - // (kDistTest.f[DIR_0MM ])[k] = zero; - // (kDistTest.f[DIR_0PM ])[k] = zero; - // (kDistTest.f[DIR_0MP ])[k] = zero; - // (kDistTest.f[DIR_000])[k] = zero; - // (kDistTest.f[DIR_PPP ])[k] = zero; - // (kDistTest.f[DIR_MMP ])[k] = zero; - // (kDistTest.f[DIR_PMP ])[k] = zero; - // (kDistTest.f[DIR_MPP ])[k] = zero; - // (kDistTest.f[DIR_PPM ])[k] = zero; - // (kDistTest.f[DIR_MMM ])[k] = zero; - // (kDistTest.f[DIR_PMM ])[k] = zero; - // (kDistTest.f[DIR_MPM ])[k] = zero; - //} - - ////////////////////////////////////////////////////////////////////////////////////// - //// first bad fix for negative x velocity - ////if(vvx > zero) vvx = zero; - ////////////////////////////////////////////////////////////////////////////////////// - ////// second bad fix for negative x velocity - ////if(vvx > zero){ - //// vvx = -vvx; - //// vvy = -vvy; - //// vvz = -vvz; - ////} - //////////////////////////////////////////////////////////////////////////////////// - double vx2 = vvx * vvx; - double vy2 = vvy * vvy; - double vz2 = vvz * vvz; - ////////////////////////////////////////////////////////////////////////////////// - //original + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //double mfabb = (D.f[DIR_P00])[k1e ]; + //double mfcbb = (D.f[DIR_M00])[k1w ]; + //double mfbab = (D.f[DIR_0P0])[k1n ]; + //double mfbcb = (D.f[DIR_0M0])[k1s ]; + //double mfbba = (D.f[DIR_00P])[k1t ]; + //double mfbbc = (D.f[DIR_00M])[k1b ]; + //double mfaab = (D.f[DIR_PP0])[k1ne ]; + //double mfccb = (D.f[DIR_MM0])[k1sw ]; + //double mfacb = (D.f[DIR_PM0])[k1se ]; + //double mfcab = (D.f[DIR_MP0])[k1nw ]; + //double mfaba = (D.f[DIR_P0P])[k1te ]; + //double mfcbc = (D.f[DIR_M0M])[k1bw ]; + //double mfabc = (D.f[DIR_P0M])[k1be ]; + //double mfcba = (D.f[DIR_M0P])[k1tw ]; + //double mfbaa = (D.f[DIR_0PP])[k1tn ]; + //double mfbcc = (D.f[DIR_0MM])[k1bs ]; + //double mfbac = (D.f[DIR_0PM])[k1bn ]; + //double mfbca = (D.f[DIR_0MP])[k1ts ]; + //double mfbbb = (D.f[DIR_000])[k1zero]; + //double mfaaa = (D.f[DIR_PPP])[k1tne ]; + //double mfcca = (D.f[DIR_MMP])[k1tsw ]; + //double mfaca = (D.f[DIR_PMP])[k1tse ]; + //double mfcaa = (D.f[DIR_MPP])[k1tnw ]; + //double mfaac = (D.f[DIR_PPM])[k1bne ]; + //double mfccc = (D.f[DIR_MMM])[k1bsw ]; + //double mfacc = (D.f[DIR_PMM])[k1bse ]; + //double mfcac = (D.f[DIR_MPM])[k1bnw ]; + real mfabb = (D.f[DIR_P00])[k1e ]; + real mfcbb = (D.f[DIR_M00])[k1w ]; + real mfbab = (D.f[DIR_0P0])[k1n ]; + real mfbcb = (D.f[DIR_0M0])[k1s ]; + real mfbba = (D.f[DIR_00P])[k1t ]; + real mfbbc = (D.f[DIR_00M])[k1b ]; + real mfaab = (D.f[DIR_PP0])[k1ne ]; + real mfccb = (D.f[DIR_MM0])[k1sw ]; + real mfacb = (D.f[DIR_PM0])[k1se ]; + real mfcab = (D.f[DIR_MP0])[k1nw ]; + real mfaba = (D.f[DIR_P0P])[k1te ]; + real mfcbc = (D.f[DIR_M0M])[k1bw ]; + real mfabc = (D.f[DIR_P0M])[k1be ]; + real mfcba = (D.f[DIR_M0P])[k1tw ]; + real mfbaa = (D.f[DIR_0PP])[k1tn ]; + real mfbcc = (D.f[DIR_0MM])[k1bs ]; + real mfbac = (D.f[DIR_0PM])[k1bn ]; + real mfbca = (D.f[DIR_0MP])[k1ts ]; + real mfbbb = (D.f[DIR_000])[k1zero]; + real mfaaa = (D.f[DIR_PPP])[k1tne ]; + real mfcca = (D.f[DIR_MMP])[k1tsw ]; + real mfaca = (D.f[DIR_PMP])[k1tse ]; + real mfcaa = (D.f[DIR_MPP])[k1tnw ]; + real mfaac = (D.f[DIR_PPM])[k1bne ]; + real mfccc = (D.f[DIR_MMM])[k1bsw ]; + real mfacc = (D.f[DIR_PMM])[k1bse ]; + real mfcac = (D.f[DIR_MPM])[k1bnw ]; + + //real mfcbb = (D.f[DIR_P00])[ke ]; + //real mfabb = (D.f[DIR_M00])[kw ]; + //real mfbcb = (D.f[DIR_0P0])[kn ]; + //real mfbab = (D.f[DIR_0M0])[ks ]; + //real mfbbc = (D.f[DIR_00P])[kt ]; + //real mfbba = (D.f[DIR_00M])[kb ]; + //real mfccb = (D.f[DIR_PP0])[kne ]; + //real mfaab = (D.f[DIR_MM0])[ksw ]; + //real mfcab = (D.f[DIR_PM0])[kse ]; + //real mfacb = (D.f[DIR_MP0])[knw ]; + //real mfcbc = (D.f[DIR_P0P])[kte ]; + //real mfaba = (D.f[DIR_M0M])[kbw ]; + //real mfcba = (D.f[DIR_P0M])[kbe ]; + //real mfabc = (D.f[DIR_M0P])[ktw ]; + //real mfbcc = (D.f[DIR_0PP])[ktn ]; + //real mfbaa = (D.f[DIR_0MM])[kbs ]; + //real mfbca = (D.f[DIR_0PM])[kbn ]; + //real mfbac = (D.f[DIR_0MP])[kts ]; + //real mfbbb = (D.f[DIR_000])[kzero]; + //real mfccc = (D.f[DIR_PPP])[ktne ]; + //real mfaac = (D.f[DIR_MMP])[ktsw ]; + //real mfcac = (D.f[DIR_PMP])[ktse ]; + //real mfacc = (D.f[DIR_MPP])[ktnw ]; + //real mfcca = (D.f[DIR_PPM])[kbne ]; + //real mfaaa = (D.f[DIR_MMM])[kbsw ]; + //real mfcaa = (D.f[DIR_PMM])[kbse ]; + //real mfaca = (D.f[DIR_MPM])[kbnw ]; + //////////////////////////////////////////////////////////////////////////////////// + //real rho = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + + // (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + + // ((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one + //////////////////////////////////////////////////////////////////////////////////// + real rho = rhoBC[k]; + //////////////////////////////////////////////////////////////////////////////////// + real OoRho = c1o1 / (rho * 1.5f); + //////////////////////////////////////////////////////////////////////////////////// + real vvx = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + + (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) + + (mfcbb-mfabb)) * OoRho; + real vvy =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) + + (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) + + (mfbcb-mfbab)) * OoRho; + real vvz =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) + + (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) + + (mfbbc-mfbba)) * OoRho; + ///////////////////////// + //Test Values + //double vvx = 0.016; + //double vvy = zero; + //double vvz = zero; + //////////////////////////////////////////////////////////////////////////////////////// + ////round off error test + //if(vvx!=zero){ + // (kDistTest.f[DIR_P00])[k] = mfabb; + // (kDistTest.f[DIR_M00])[k] = mfcbb; + // (kDistTest.f[DIR_0P0])[k] = mfbab; + // (kDistTest.f[DIR_0M0])[k] = mfbcb; + // (kDistTest.f[DIR_00P])[k] = mfbba; + // (kDistTest.f[DIR_00M])[k] = mfbbc; + // (kDistTest.f[DIR_PP0])[k] = mfaab; + // (kDistTest.f[DIR_MM0])[k] = mfccb; + // (kDistTest.f[DIR_PM0])[k] = mfacb; + // (kDistTest.f[DIR_MP0])[k] = mfcab; + // (kDistTest.f[DIR_P0P])[k] = mfaba; + // (kDistTest.f[DIR_M0M])[k] = mfcbc; + // (kDistTest.f[DIR_P0M])[k] = mfabc; + // (kDistTest.f[DIR_M0P])[k] = mfcba; + // (kDistTest.f[DIR_0PP])[k] = mfbaa; + // (kDistTest.f[DIR_0MM])[k] = mfbcc; + // (kDistTest.f[DIR_0PM])[k] = mfbac; + // (kDistTest.f[DIR_0MP])[k] = mfbca; + // (kDistTest.f[DIR_000])[k] = KQK; + // (kDistTest.f[DIR_PPP])[k] = mfaaa; + // (kDistTest.f[DIR_MMP])[k] = mfcca; + // (kDistTest.f[DIR_PMP])[k] = mfaca; + // (kDistTest.f[DIR_MPP])[k] = mfcaa; + // (kDistTest.f[DIR_PPM])[k] = mfaac; + // (kDistTest.f[DIR_MMM])[k] = mfccc; + // (kDistTest.f[DIR_PMM])[k] = mfacc; + // (kDistTest.f[DIR_MPM])[k] = mfcac; + //}else{ + // (kDistTest.f[DIR_P00])[k] = zero; + // (kDistTest.f[DIR_M00])[k] = zero; + // (kDistTest.f[DIR_0P0])[k] = zero; + // (kDistTest.f[DIR_0M0])[k] = zero; + // (kDistTest.f[DIR_00P])[k] = zero; + // (kDistTest.f[DIR_00M])[k] = zero; + // (kDistTest.f[DIR_PP0])[k] = zero; + // (kDistTest.f[DIR_MM0])[k] = zero; + // (kDistTest.f[DIR_PM0])[k] = zero; + // (kDistTest.f[DIR_MP0])[k] = zero; + // (kDistTest.f[DIR_P0P])[k] = zero; + // (kDistTest.f[DIR_M0M])[k] = zero; + // (kDistTest.f[DIR_P0M])[k] = zero; + // (kDistTest.f[DIR_M0P])[k] = zero; + // (kDistTest.f[DIR_0PP])[k] = zero; + // (kDistTest.f[DIR_0MM])[k] = zero; + // (kDistTest.f[DIR_0PM])[k] = zero; + // (kDistTest.f[DIR_0MP])[k] = zero; + // (kDistTest.f[DIR_000])[k] = zero; + // (kDistTest.f[DIR_PPP])[k] = zero; + // (kDistTest.f[DIR_MMP])[k] = zero; + // (kDistTest.f[DIR_PMP])[k] = zero; + // (kDistTest.f[DIR_MPP])[k] = zero; + // (kDistTest.f[DIR_PPM])[k] = zero; + // (kDistTest.f[DIR_MMM])[k] = zero; + // (kDistTest.f[DIR_PMM])[k] = zero; + // (kDistTest.f[DIR_MPM])[k] = zero; + //} + + ////////////////////////////////////////////////////////////////////////////////////// + //// first bad fix for negative x velocity + ////if(vvx > zero) vvx = zero; + ////////////////////////////////////////////////////////////////////////////////////// + ////// second bad fix for negative x velocity + ////if(vvx > zero){ + //// vvx = -vvx; + //// vvy = -vvy; + //// vvz = -vvz; + ////} + //////////////////////////////////////////////////////////////////////////////////// + double vx2 = vvx * vvx; + double vy2 = vvy * vvy; + double vz2 = vvz * vvz; + ////////////////////////////////////////////////////////////////////////////////// + //original real XXb = -c2o3 + vx2; real XXc = -c1o2 * (XXb + c1o1 + vvx); real XXa = XXc + vvx; @@ -4104,213 +4119,213 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, real ZZb = -c2o3 + vz2; real ZZc = -c1o2 * (ZZb + c1o1 + vvz); real ZZa = ZZc + vvz; - ////////////////////////////////////////////////////////////////////////////////// - //unkonditioniert - mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27; - mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27; - mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27; - mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27; - mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27; - mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27; - mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54; - mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54; - mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54; - mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54; - mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54; - mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54; - mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54; - mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54; - mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54; - mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54; - mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54; - mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54; - mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27; - mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216; - mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216; - mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216; - mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216; - mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216; - mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216; - mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216; - mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216; - ////////////////////////////////////////////////////////// - ////konditioniert - //double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one); - //double OnoOver216Rho = c1over216*rhoBC[k]; - //mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2)); - //mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2)); - //mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2)); - //mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2))); - //mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2)); - //mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2))); - //mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)))); - //mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)))); - //mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))); - //mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))); - //mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)))); - //mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)))); - //mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))); - //mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))); - //mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)))); - //mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)))); - //mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))); - //mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))); - //mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2))); - //mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2)); - //mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2)); - //mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2)); - //mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2)); - //mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2)); - //mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2)); - //mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2)); - //mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2)); + ////////////////////////////////////////////////////////////////////////////////// + //unkonditioniert + mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27; + mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27; + mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27; + mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27; + mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27; + mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27; + mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54; + mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54; + mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54; + mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54; + mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54; + mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54; + mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54; + mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54; + mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54; + mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54; + mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54; + mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54; + mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27; + mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216; + mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216; + mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216; + mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216; + mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216; + mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216; + mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216; + mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216; + ////////////////////////////////////////////////////////// + ////konditioniert + //double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one); + //double OnoOver216Rho = c1over216*rhoBC[k]; + //mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2)); + //mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2)); + //mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2)); + //mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2))); + //mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2)); + //mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2))); + //mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)))); + //mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)))); + //mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))); + //mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))); + //mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)))); + //mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)))); + //mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))); + //mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))); + //mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)))); + //mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)))); + //mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))); + //mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))); + //mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2))); + //mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2)); + //mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2)); + //mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2)); + //mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2)); + //mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2)); + //mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2)); + //mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2)); + //mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2)); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //if (isEvenTimestep==true) //{ - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - //} + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; + //} //else //{ - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; //} ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //__syncthreads(); - (D.f[DIR_P00 ])[ke ] = mfabb;//mfcbb; - (D.f[DIR_M00 ])[kw ] = mfcbb;//mfabb; - (D.f[DIR_0P0 ])[kn ] = mfbab;//mfbcb; - (D.f[DIR_0M0 ])[ks ] = mfbcb;//mfbab; - (D.f[DIR_00P ])[kt ] = mfbba;//mfbbc; - (D.f[DIR_00M ])[kb ] = mfbbc;//mfbba; - (D.f[DIR_PP0 ])[kne ] = mfaab;//mfccb; - (D.f[DIR_MM0 ])[ksw ] = mfccb;//mfaab; - (D.f[DIR_PM0 ])[kse ] = mfacb;//mfcab; - (D.f[DIR_MP0 ])[knw ] = mfcab;//mfacb; - (D.f[DIR_P0P ])[kte ] = mfaba;//mfcbc; - (D.f[DIR_M0M ])[kbw ] = mfcbc;//mfaba; - (D.f[DIR_P0M ])[kbe ] = mfabc;//mfcba; - (D.f[DIR_M0P ])[ktw ] = mfcba;//mfabc; - (D.f[DIR_0PP ])[ktn ] = mfbaa;//mfbcc; - (D.f[DIR_0MM ])[kbs ] = mfbcc;//mfbaa; - (D.f[DIR_0PM ])[kbn ] = mfbac;//mfbca; - (D.f[DIR_0MP ])[kts ] = mfbca;//mfbac; - (D.f[DIR_000])[kzero] = mfbbb;//mfbbb; - (D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc; - (D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac; - (D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac; - (D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc; - (D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca; - (D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa; - (D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa; - (D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca; - //(D.f[DIR_P00 ])[ke ] = mfcbb; - //(D.f[DIR_M00 ])[kw ] = mfabb; - //(D.f[DIR_0P0 ])[kn ] = mfbcb; - //(D.f[DIR_0M0 ])[ks ] = mfbab; - //(D.f[DIR_00P ])[kt ] = mfbbc; - //(D.f[DIR_00M ])[kb ] = mfbba; - //(D.f[DIR_PP0 ])[kne ] = mfccb; - //(D.f[DIR_MM0 ])[ksw ] = mfaab; - //(D.f[DIR_PM0 ])[kse ] = mfcab; - //(D.f[DIR_MP0 ])[knw ] = mfacb; - //(D.f[DIR_P0P ])[kte ] = mfcbc; - //(D.f[DIR_M0M ])[kbw ] = mfaba; - //(D.f[DIR_P0M ])[kbe ] = mfcba; - //(D.f[DIR_M0P ])[ktw ] = mfabc; - //(D.f[DIR_0PP ])[ktn ] = mfbcc; - //(D.f[DIR_0MM ])[kbs ] = mfbaa; - //(D.f[DIR_0PM ])[kbn ] = mfbca; - //(D.f[DIR_0MP ])[kts ] = mfbac; - //(D.f[DIR_000])[kzero] = mfbbb; - //(D.f[DIR_PPP ])[ktne ] = mfccc; - //(D.f[DIR_MMP ])[ktsw ] = mfaac; - //(D.f[DIR_PMP ])[ktse ] = mfcac; - //(D.f[DIR_MPP ])[ktnw ] = mfacc; - //(D.f[DIR_PPM ])[kbne ] = mfcca; - //(D.f[DIR_MMM ])[kbsw ] = mfaaa; - //(D.f[DIR_PMM ])[kbse ] = mfcaa; - //(D.f[DIR_MPM ])[kbnw ] = mfaca; - - //(D.f[DIR_P00 ])[ke ] = fE ; //f1_E ; //fW; //fE ; - //(D.f[DIR_M00 ])[kw ] = fW ; //f1_W ; //fE; //fW ; - //(D.f[DIR_0P0 ])[kn ] = fN ; //f1_N ; //fS; //fN ; - //(D.f[DIR_0M0 ])[ks ] = fS ; //f1_S ; //fN; //fS ; - //(D.f[DIR_00P ])[kt ] = fT ; //f1_T ; //fB; //fT ; - //(D.f[DIR_00M ])[kb ] = fB ; //f1_B ; //fT; //fB ; - //(D.f[DIR_PP0 ])[kne ] = fNE; //f1_NE; //fSW; //fNE; - //(D.f[DIR_MM0 ])[ksw ] = fSW; //f1_SW; //fNE; //fSW; - //(D.f[DIR_PM0 ])[kse ] = fSE; //f1_SE; //fNW; //fSE; - //(D.f[DIR_MP0 ])[knw ] = fNW; //f1_NW; //fSE; //fNW; - //(D.f[DIR_P0P ])[kte ] = fTE; //f1_TE; //fBW; //fTE; - //(D.f[DIR_M0M ])[kbw ] = fBW; //f1_BW; //fTE; //fBW; - //(D.f[DIR_P0M ])[kbe ] = fBE; //f1_BE; //fTW; //fBE; - //(D.f[DIR_M0P ])[ktw ] = fTW; //f1_TW; //fBE; //fTW; - //(D.f[DIR_0PP ])[ktn ] = fTN; //f1_TN; //fBS; //fTN; - //(D.f[DIR_0MM ])[kbs ] = fBS; //f1_BS; //fTN; //fBS; - //(D.f[DIR_0PM ])[kbn ] = fBN; //f1_BN; //fTS; //fBN; - //(D.f[DIR_0MP ])[kts ] = fTS; //f1_TS; //fBN; //fTS; + (D.f[DIR_P00])[ke ] = mfabb;//mfcbb; + (D.f[DIR_M00])[kw ] = mfcbb;//mfabb; + (D.f[DIR_0P0])[kn ] = mfbab;//mfbcb; + (D.f[DIR_0M0])[ks ] = mfbcb;//mfbab; + (D.f[DIR_00P])[kt ] = mfbba;//mfbbc; + (D.f[DIR_00M])[kb ] = mfbbc;//mfbba; + (D.f[DIR_PP0])[kne ] = mfaab;//mfccb; + (D.f[DIR_MM0])[ksw ] = mfccb;//mfaab; + (D.f[DIR_PM0])[kse ] = mfacb;//mfcab; + (D.f[DIR_MP0])[knw ] = mfcab;//mfacb; + (D.f[DIR_P0P])[kte ] = mfaba;//mfcbc; + (D.f[DIR_M0M])[kbw ] = mfcbc;//mfaba; + (D.f[DIR_P0M])[kbe ] = mfabc;//mfcba; + (D.f[DIR_M0P])[ktw ] = mfcba;//mfabc; + (D.f[DIR_0PP])[ktn ] = mfbaa;//mfbcc; + (D.f[DIR_0MM])[kbs ] = mfbcc;//mfbaa; + (D.f[DIR_0PM])[kbn ] = mfbac;//mfbca; + (D.f[DIR_0MP])[kts ] = mfbca;//mfbac; + (D.f[DIR_000])[kzero] = mfbbb;//mfbbb; + (D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc; + (D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac; + (D.f[DIR_PMP])[ktse ] = mfaca;//mfcac; + (D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc; + (D.f[DIR_PPM])[kbne ] = mfaac;//mfcca; + (D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa; + (D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa; + (D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca; + //(D.f[DIR_P00])[ke ] = mfcbb; + //(D.f[DIR_M00])[kw ] = mfabb; + //(D.f[DIR_0P0])[kn ] = mfbcb; + //(D.f[DIR_0M0])[ks ] = mfbab; + //(D.f[DIR_00P])[kt ] = mfbbc; + //(D.f[DIR_00M])[kb ] = mfbba; + //(D.f[DIR_PP0])[kne ] = mfccb; + //(D.f[DIR_MM0])[ksw ] = mfaab; + //(D.f[DIR_PM0])[kse ] = mfcab; + //(D.f[DIR_MP0])[knw ] = mfacb; + //(D.f[DIR_P0P])[kte ] = mfcbc; + //(D.f[DIR_M0M])[kbw ] = mfaba; + //(D.f[DIR_P0M])[kbe ] = mfcba; + //(D.f[DIR_M0P])[ktw ] = mfabc; + //(D.f[DIR_0PP])[ktn ] = mfbcc; + //(D.f[DIR_0MM])[kbs ] = mfbaa; + //(D.f[DIR_0PM])[kbn ] = mfbca; + //(D.f[DIR_0MP])[kts ] = mfbac; + //(D.f[DIR_000])[kzero] = mfbbb; + //(D.f[DIR_PPP])[ktne ] = mfccc; + //(D.f[DIR_MMP])[ktsw ] = mfaac; + //(D.f[DIR_PMP])[ktse ] = mfcac; + //(D.f[DIR_MPP])[ktnw ] = mfacc; + //(D.f[DIR_PPM])[kbne ] = mfcca; + //(D.f[DIR_MMM])[kbsw ] = mfaaa; + //(D.f[DIR_PMM])[kbse ] = mfcaa; + //(D.f[DIR_MPM])[kbnw ] = mfaca; + + //(D.f[DIR_P00])[ke ] = fE ; //f1_E ; //fW; //fE ; + //(D.f[DIR_M00])[kw ] = fW ; //f1_W ; //fE; //fW ; + //(D.f[DIR_0P0])[kn ] = fN ; //f1_N ; //fS; //fN ; + //(D.f[DIR_0M0])[ks ] = fS ; //f1_S ; //fN; //fS ; + //(D.f[DIR_00P])[kt ] = fT ; //f1_T ; //fB; //fT ; + //(D.f[DIR_00M])[kb ] = fB ; //f1_B ; //fT; //fB ; + //(D.f[DIR_PP0])[kne ] = fNE; //f1_NE; //fSW; //fNE; + //(D.f[DIR_MM0])[ksw ] = fSW; //f1_SW; //fNE; //fSW; + //(D.f[DIR_PM0])[kse ] = fSE; //f1_SE; //fNW; //fSE; + //(D.f[DIR_MP0])[knw ] = fNW; //f1_NW; //fSE; //fNW; + //(D.f[DIR_P0P])[kte ] = fTE; //f1_TE; //fBW; //fTE; + //(D.f[DIR_M0M])[kbw ] = fBW; //f1_BW; //fTE; //fBW; + //(D.f[DIR_P0M])[kbe ] = fBE; //f1_BE; //fTW; //fBE; + //(D.f[DIR_M0P])[ktw ] = fTW; //f1_TW; //fBE; //fTW; + //(D.f[DIR_0PP])[ktn ] = fTN; //f1_TN; //fBS; //fTN; + //(D.f[DIR_0MM])[kbs ] = fBS; //f1_BS; //fTN; //fBS; + //(D.f[DIR_0PM])[kbn ] = fBN; //f1_BN; //fTS; //fBN; + //(D.f[DIR_0MP])[kts ] = fTS; //f1_TS; //fBN; //fTS; //(D.f[DIR_000])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO; - //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE; //fBSW; //fTNE; - //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW; //fTNE; //fBSW; - //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE; //fTSW; //fBNE; - //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW; //fBNE; //fTSW; - //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE; //fBNW; //fTSE; - //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW; //fTSE; //fBNW; - //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE; //fTNW; //fBSE; - //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW; //fBSE; //fTNW; + //(D.f[DIR_PPP])[ktne ] = fTNE; //f1_TNE; //fBSW; //fTNE; + //(D.f[DIR_MMM])[kbsw ] = fBSW; //f1_BSW; //fTNE; //fBSW; + //(D.f[DIR_PPM])[kbne ] = fBNE; //f1_BNE; //fTSW; //fBNE; + //(D.f[DIR_MMP])[ktsw ] = fTSW; //f1_TSW; //fBNE; //fTSW; + //(D.f[DIR_PMP])[ktse ] = fTSE; //f1_TSE; //fBNW; //fTSE; + //(D.f[DIR_MPM])[kbnw ] = fBNW; //f1_BNW; //fTSE; //fBNW; + //(D.f[DIR_PMM])[kbse ] = fBSE; //f1_BSE; //fTNW; //fBSE; + //(D.f[DIR_MPP])[ktnw ] = fTNW; //f1_TNW; //fBSE; //fTNW; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -4354,19 +4369,20 @@ __global__ void QPressDeviceEQZ27(real* rhoBC, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceZero27( real* DD, - int* k_Q, - unsigned int numberOfBCnodes, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceZero27( + real* DD, + int* k_Q, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -4410,94 +4426,94 @@ __global__ void QPressDeviceZero27( real* DD, Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //__syncthreads(); - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00 ])[ke ] =c0o1; - (D.f[DIR_M00 ])[kw ] =c0o1; - (D.f[DIR_0P0 ])[kn ] =c0o1; - (D.f[DIR_0M0 ])[ks ] =c0o1; - (D.f[DIR_00P ])[kt ] =c0o1; - (D.f[DIR_00M ])[kb ] =c0o1; - (D.f[DIR_PP0 ])[kne ] =c0o1; - (D.f[DIR_MM0 ])[ksw ] =c0o1; - (D.f[DIR_PM0 ])[kse ] =c0o1; - (D.f[DIR_MP0 ])[knw ] =c0o1; - (D.f[DIR_P0P ])[kte ] =c0o1; - (D.f[DIR_M0M ])[kbw ] =c0o1; - (D.f[DIR_P0M ])[kbe ] =c0o1; - (D.f[DIR_M0P ])[ktw ] =c0o1; - (D.f[DIR_0PP ])[ktn ] =c0o1; - (D.f[DIR_0MM ])[kbs ] =c0o1; - (D.f[DIR_0PM ])[kbn ] =c0o1; - (D.f[DIR_0MP ])[kts ] =c0o1; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + (D.f[DIR_P00])[ke ] =c0o1; + (D.f[DIR_M00])[kw ] =c0o1; + (D.f[DIR_0P0])[kn ] =c0o1; + (D.f[DIR_0M0])[ks ] =c0o1; + (D.f[DIR_00P])[kt ] =c0o1; + (D.f[DIR_00M])[kb ] =c0o1; + (D.f[DIR_PP0])[kne ] =c0o1; + (D.f[DIR_MM0])[ksw ] =c0o1; + (D.f[DIR_PM0])[kse ] =c0o1; + (D.f[DIR_MP0])[knw ] =c0o1; + (D.f[DIR_P0P])[kte ] =c0o1; + (D.f[DIR_M0M])[kbw ] =c0o1; + (D.f[DIR_P0M])[kbe ] =c0o1; + (D.f[DIR_M0P])[ktw ] =c0o1; + (D.f[DIR_0PP])[ktn ] =c0o1; + (D.f[DIR_0MM])[kbs ] =c0o1; + (D.f[DIR_0PM])[kbn ] =c0o1; + (D.f[DIR_0MP])[kts ] =c0o1; (D.f[DIR_000])[kzero] =c0o1; - (D.f[DIR_PPP ])[ktne ] =c0o1; - (D.f[DIR_MMP ])[ktsw ] =c0o1; - (D.f[DIR_PMP ])[ktse ] =c0o1; - (D.f[DIR_MPP ])[ktnw ] =c0o1; - (D.f[DIR_PPM ])[kbne ] =c0o1; - (D.f[DIR_MMM ])[kbsw ] =c0o1; - (D.f[DIR_PMM ])[kbse ] =c0o1; - (D.f[DIR_MPM ])[kbnw ] =c0o1; + (D.f[DIR_PPP])[ktne ] =c0o1; + (D.f[DIR_MMP])[ktsw ] =c0o1; + (D.f[DIR_PMP])[ktse ] =c0o1; + (D.f[DIR_MPP])[ktnw ] =c0o1; + (D.f[DIR_PPM])[kbne ] =c0o1; + (D.f[DIR_MMM])[kbsw ] =c0o1; + (D.f[DIR_PMM])[kbse ] =c0o1; + (D.f[DIR_MPM])[kbnw ] =c0o1; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -4541,22 +4557,23 @@ __global__ void QPressDeviceZero27( real* DD, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QPressDeviceFake27( real* rhoBC, - real* DD, - int* k_Q, - int* k_N, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDeviceFake27( + real* rhoBC, + real* DD, + int* k_Q, + int* k_N, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index const unsigned nx = blockDim.x; const unsigned ny = gridDim.x; @@ -4630,148 +4647,148 @@ __global__ void QPressDeviceFake27( real* rhoBC, Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_W = (D.f[DIR_P00 ])[k1e ]; - f1_E = (D.f[DIR_M00 ])[k1w ]; - f1_S = (D.f[DIR_0P0 ])[k1n ]; - f1_N = (D.f[DIR_0M0 ])[k1s ]; - f1_B = (D.f[DIR_00P ])[k1t ]; - f1_T = (D.f[DIR_00M ])[k1b ]; - f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - f1_NW = (D.f[DIR_PM0 ])[k1se ]; - f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - f1_BW = (D.f[DIR_P0P ])[k1te ]; - f1_TE = (D.f[DIR_M0M ])[k1bw ]; - f1_TW = (D.f[DIR_P0M ])[k1be ]; - f1_BE = (D.f[DIR_M0P ])[k1tw ]; - f1_BS = (D.f[DIR_0PP ])[k1tn ]; - f1_TN = (D.f[DIR_0MM ])[k1bs ]; - f1_TS = (D.f[DIR_0PM ])[k1bn ]; - f1_BN = (D.f[DIR_0MP ])[k1ts ]; + f1_W = (D.f[DIR_P00])[k1e ]; + f1_E = (D.f[DIR_M00])[k1w ]; + f1_S = (D.f[DIR_0P0])[k1n ]; + f1_N = (D.f[DIR_0M0])[k1s ]; + f1_B = (D.f[DIR_00P])[k1t ]; + f1_T = (D.f[DIR_00M])[k1b ]; + f1_SW = (D.f[DIR_PP0])[k1ne ]; + f1_NE = (D.f[DIR_MM0])[k1sw ]; + f1_NW = (D.f[DIR_PM0])[k1se ]; + f1_SE = (D.f[DIR_MP0])[k1nw ]; + f1_BW = (D.f[DIR_P0P])[k1te ]; + f1_TE = (D.f[DIR_M0M])[k1bw ]; + f1_TW = (D.f[DIR_P0M])[k1be ]; + f1_BE = (D.f[DIR_M0P])[k1tw ]; + f1_BS = (D.f[DIR_0PP])[k1tn ]; + f1_TN = (D.f[DIR_0MM])[k1bs ]; + f1_TS = (D.f[DIR_0PM])[k1bn ]; + f1_BN = (D.f[DIR_0MP])[k1ts ]; f1_ZERO = (D.f[DIR_000])[k1zero]; - f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + f1_BSW = (D.f[DIR_PPP])[k1tne ]; + f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + f1_BNW = (D.f[DIR_PMP])[k1tse ]; + f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + f1_TSW = (D.f[DIR_PPM])[k1bne ]; + f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + f1_TNW = (D.f[DIR_PMM])[k1bse ]; + f1_TSE = (D.f[DIR_MPM])[k1bnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3; vx1 = ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + ((f1_BE - f1_TW) + (f1_TE - f1_BW)) + ((f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_E - f1_W); + (f1_E - f1_W); vx2 = (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) + ((f1_BN - f1_TS) + (f1_TN - f1_BS)) + (-(f1_SE - f1_NW) + (f1_NE - f1_SW)) + - (f1_N - f1_S); + (f1_N - f1_S); vx3 = ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) + (-(f1_BN - f1_TS) + (f1_TN - f1_BS)) + ((f1_TE - f1_BW) - (f1_BE - f1_TW)) + - (f1_T - f1_B); + (f1_T - f1_B); real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); ////////////////////////////////////////////////////////////////////////// real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; - //drho1 = (drho1 + rhoBC[k])/2.f; - drho1 = drho1 - rhoBC[k]; + //drho1 = (drho1 + rhoBC[k])/2.f; + drho1 = drho1 - rhoBC[k]; __syncthreads(); - (D.f[DIR_P00 ])[ke ] = c2o27* (rhoBC[k]+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); - (D.f[DIR_M00 ])[kw ] = c2o27* (rhoBC[k]+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); - (D.f[DIR_0P0 ])[kn ] = c2o27* (rhoBC[k]+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); - (D.f[DIR_0M0 ])[ks ] = c2o27* (rhoBC[k]+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); - (D.f[DIR_00P ])[kt ] = c2o27* (rhoBC[k]+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); - (D.f[DIR_00M ])[kb ] = c2o27* (rhoBC[k]+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); - (D.f[DIR_PP0 ])[kne ] = f1_SW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_MM0 ])[ksw ] = f1_NE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_PM0 ])[kse ] = f1_NW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_MP0 ])[knw ] = f1_SE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_P0P ])[kte ] = f1_BW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_M0M ])[kbw ] = f1_TE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_P0M ])[kbe ] = f1_TW -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_M0P ])[ktw ] = f1_BE -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0PP ])[ktn ] = f1_BS -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0MM ])[kbs ] = f1_TN -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0PM ])[kbn ] = f1_TS -c1o54*drho1; // c1o100; // zero; // - (D.f[DIR_0MP ])[kts ] = f1_BN -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P00])[ke ] = c2o27* (rhoBC[k]+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_M00])[kw ] = c2o27* (rhoBC[k]+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_0P0])[kn ] = c2o27* (rhoBC[k]+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_0M0])[ks ] = c2o27* (rhoBC[k]+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_00P])[kt ] = c2o27* (rhoBC[k]+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_00M])[kb ] = c2o27* (rhoBC[k]+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_PP0])[kne ] = f1_SW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_MM0])[ksw ] = f1_NE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_PM0])[kse ] = f1_NW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_MP0])[knw ] = f1_SE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P0P])[kte ] = f1_BW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_M0M])[kbw ] = f1_TE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_P0M])[kbe ] = f1_TW -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_M0P])[ktw ] = f1_BE -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0PP])[ktn ] = f1_BS -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0MM])[kbs ] = f1_TN -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0PM])[kbn ] = f1_TS -c1o54*drho1; // c1o100; // zero; // + (D.f[DIR_0MP])[kts ] = f1_BN -c1o54*drho1; // c1o100; // zero; // (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1; // c1o100; // zero; // - (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1; // c1o100; // zero; // - (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1; // c1o100; // zero; // + (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1; // c1o100; // zero; // } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -4815,461 +4832,462 @@ __global__ void QPressDeviceFake27( real* rhoBC, ////////////////////////////////////////////////////////////////////////// -__global__ void QPressDevice27_IntBB(real* rho, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QPressDevice27_IntBB( + real* rho, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { - Distributions27 D; - if (isEvenTimestep==true) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if(k < numberOfBCnodes) - { - //////////////////////////////////////////////////////////////////////////////// - //real VeloX = vx[k]; - //real VeloY = vy[k]; - //real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny) - //////////////////////////////////////////////////////////////////////////////// - real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, - *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, - *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, - *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, - *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; - q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; - q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; - q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; - q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes]; - q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes]; - q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes]; - q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes]; - q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes]; - //////////////////////////////////////////////////////////////////////////////// - //index - unsigned int KQK = k_Q[k]; - unsigned int kzero= KQK; - unsigned int ke = KQK; - unsigned int kw = neighborX[KQK]; - unsigned int kn = KQK; - unsigned int ks = neighborY[KQK]; - unsigned int kt = KQK; - unsigned int kb = neighborZ[KQK]; - unsigned int ksw = neighborY[kw]; - unsigned int kne = KQK; - unsigned int kse = ks; - unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - unsigned int kte = KQK; - unsigned int kbe = kb; - unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - unsigned int ktn = KQK; - unsigned int kbn = kb; - unsigned int kts = ks; - unsigned int ktse = ks; - unsigned int kbnw = kbw; - unsigned int ktnw = kw; - unsigned int kbse = kbs; - unsigned int ktsw = ksw; - unsigned int kbne = kb; - unsigned int ktne = KQK; - unsigned int kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////// - real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, - f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; - //////////////////////////////////////////////////////////////////////////////// - real vx1, vx2, vx3, drho, feq, q; - drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + - f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + - f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); - - vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + - ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + - (f_E - f_W))/(c1o1+drho); - - - vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + - ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + - (f_N - f_S))/(c1o1+drho); - - vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + - (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + - (f_T - f_B))/(c1o1+drho); - - real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); - - ////////////////////////////////////////////////////////////////////////// - if (isEvenTimestep==false) - { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //Test - //(D.f[DIR_000])[k]=c1o10; - real rhoDiff = drho - rho[k]; - real VeloX = vx1; - real VeloY = vx2; - real VeloZ = vx3; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - q = q_dirE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*( vx1 )*( vx1 )-cu_sq); - (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX )))/(c1o1+q); - } - - q = q_dirW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*(-vx1 )*(-vx1 )-cu_sq); - (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX )))/(c1o1+q); - } - - q = q_dirN[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*( vx2 )*( vx2 )-cu_sq); - (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY )))/(c1o1+q); - } - - q = q_dirS[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*( -vx2 )*( -vx2 )-cu_sq); - (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY )))/(c1o1+q); - } - - q = q_dirT[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*( vx3)*( vx3)-cu_sq); - (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ )))/(c1o1+q); - } - - q = q_dirB[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c2o27* (drho+c9o2*( -vx3)*( -vx3)-cu_sq); - (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ )))/(c1o1+q); - } - - q = q_dirNE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q); - } - - q = q_dirSW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q); - } - - q = q_dirSE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q); - } - - q = q_dirNW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q); - } - - q = q_dirTE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q); - } - - q = q_dirBW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q); - } - - q = q_dirBE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q); - } - - q = q_dirTW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q); - } - - q = q_dirTN[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); - (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q); - } - - q = q_dirBS[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirBN[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); - (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirTS[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o54* (drho+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q); - } - - q = q_dirTNE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q); - } - - q = q_dirBSW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirBNE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirTSW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q); - } - - q = q_dirTSE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q); - } - - q = q_dirBNW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirBSE[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q); - } - - q = q_dirTNW[k]; - if (q>=c0o1 && q<=c1o1) - { - feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); - (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q); - } - } + Distributions27 D; + if (isEvenTimestep==true) + { + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } + else + { + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; + } + //////////////////////////////////////////////////////////////////////////////// + const unsigned x = threadIdx.x; // Globaler x-Index + const unsigned y = blockIdx.x; // Globaler y-Index + const unsigned z = blockIdx.y; // Globaler z-Index + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + const unsigned k = nx*(ny*z + y) + x; + ////////////////////////////////////////////////////////////////////////// + + if(k < numberOfBCnodes) + { + //////////////////////////////////////////////////////////////////////////////// + //real VeloX = vx[k]; + //real VeloY = vy[k]; + //real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny) + //////////////////////////////////////////////////////////////////////////////// + real *q_dirE, *q_dirW, *q_dirN, *q_dirS, *q_dirT, *q_dirB, + *q_dirNE, *q_dirSW, *q_dirSE, *q_dirNW, *q_dirTE, *q_dirBW, + *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, + *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, + *q_dirBSE, *q_dirBNW; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; + q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; + q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; + q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes]; + q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes]; + q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes]; + q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes]; + q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes]; + //////////////////////////////////////////////////////////////////////////////// + //index + unsigned int KQK = k_Q[k]; + unsigned int kzero= KQK; + unsigned int ke = KQK; + unsigned int kw = neighborX[KQK]; + unsigned int kn = KQK; + unsigned int ks = neighborY[KQK]; + unsigned int kt = KQK; + unsigned int kb = neighborZ[KQK]; + unsigned int ksw = neighborY[kw]; + unsigned int kne = KQK; + unsigned int kse = ks; + unsigned int knw = kw; + unsigned int kbw = neighborZ[kw]; + unsigned int kte = KQK; + unsigned int kbe = kb; + unsigned int ktw = kw; + unsigned int kbs = neighborZ[ks]; + unsigned int ktn = KQK; + unsigned int kbn = kb; + unsigned int kts = ks; + unsigned int ktse = ks; + unsigned int kbnw = kbw; + unsigned int ktnw = kw; + unsigned int kbse = kbs; + unsigned int ktsw = ksw; + unsigned int kbne = kb; + unsigned int ktne = KQK; + unsigned int kbsw = neighborZ[ksw]; + //////////////////////////////////////////////////////////////////////////////// + real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, + f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; + + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; + //////////////////////////////////////////////////////////////////////////////// + real vx1, vx2, vx3, drho, feq, q; + drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + + f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + + f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); + + vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) + + (f_E - f_W))/(c1o1+drho); + + + vx2 = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) + + ((f_BN - f_TS) + (f_TN - f_BS)) + (-(f_SE - f_NW) + (f_NE - f_SW)) + + (f_N - f_S))/(c1o1+drho); + + vx3 = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) + + (-(f_BN - f_TS) + (f_TN - f_BS)) + ((f_TE - f_BW) - (f_BE - f_TW)) + + (f_T - f_B))/(c1o1+drho); + + real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); + + ////////////////////////////////////////////////////////////////////////// + if (isEvenTimestep==false) + { + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; + } + else + { + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //Test + //(D.f[DIR_000])[k]=c1o10; + real rhoDiff = drho - rho[k]; + real VeloX = vx1; + real VeloY = vx2; + real VeloZ = vx3; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + q = q_dirE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX )))/(c1o1+q); + } + + q = q_dirW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX )))/(c1o1+q); + } + + q = q_dirN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY )))/(c1o1+q); + } + + q = q_dirS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY )))/(c1o1+q); + } + + q = q_dirT[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ )))/(c1o1+q); + } + + q = q_dirB[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c2o27* (drho+c9o2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ )))/(c1o1+q); + } + + q = q_dirNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q); + } + + q = q_dirSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q); + } + + q = q_dirSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q); + } + + q = q_dirNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q); + } + + q = q_dirTE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q); + } + + q = q_dirBW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q); + } + + q = q_dirBE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q); + } + + q = q_dirTW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q); + } + + q = q_dirTN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); + (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q); + } + + q = q_dirBS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirBN[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); + (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirTS[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o54* (drho+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q); + } + + q = q_dirTNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q); + } + + q = q_dirBSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirBNE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirTSW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q); + } + + q = q_dirTSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q); + } + + q = q_dirBNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirBSE[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q); + } + + q = q_dirTNW[k]; + if (q>=c0o1 && q<=c1o1) + { + feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q); + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu index 8675780d26e63656b04fdfc1f9836b1eba8d1b87..5d4572e234fdcad072e9b666c911f3250c32346a 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu @@ -21,7 +21,7 @@ __global__ void PressSchlaff27(real* rhoBC, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -71,94 +71,94 @@ __global__ void PressSchlaff27(real* rhoBC, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_E = (D.f[DIR_P00 ])[ke ]; - f1_W = (D.f[DIR_M00 ])[kw ]; - f1_N = (D.f[DIR_0P0 ])[kn ]; - f1_S = (D.f[DIR_0M0 ])[ks ]; - f1_T = (D.f[DIR_00P ])[kt ]; - f1_B = (D.f[DIR_00M ])[kb ]; - f1_NE = (D.f[DIR_PP0 ])[kne ]; - f1_SW = (D.f[DIR_MM0 ])[ksw ]; - f1_SE = (D.f[DIR_PM0 ])[kse ]; - f1_NW = (D.f[DIR_MP0 ])[knw ]; - f1_TE = (D.f[DIR_P0P ])[kte ]; - f1_BW = (D.f[DIR_M0M ])[kbw ]; - f1_BE = (D.f[DIR_P0M ])[kbe ]; - f1_TW = (D.f[DIR_M0P ])[ktw ]; - f1_TN = (D.f[DIR_0PP ])[ktn ]; - f1_BS = (D.f[DIR_0MM ])[kbs ]; - f1_BN = (D.f[DIR_0PM ])[kbn ]; - f1_TS = (D.f[DIR_0MP ])[kts ]; + f1_E = (D.f[DIR_P00])[ke ]; + f1_W = (D.f[DIR_M00])[kw ]; + f1_N = (D.f[DIR_0P0])[kn ]; + f1_S = (D.f[DIR_0M0])[ks ]; + f1_T = (D.f[DIR_00P])[kt ]; + f1_B = (D.f[DIR_00M])[kb ]; + f1_NE = (D.f[DIR_PP0])[kne ]; + f1_SW = (D.f[DIR_MM0])[ksw ]; + f1_SE = (D.f[DIR_PM0])[kse ]; + f1_NW = (D.f[DIR_MP0])[knw ]; + f1_TE = (D.f[DIR_P0P])[kte ]; + f1_BW = (D.f[DIR_M0M])[kbw ]; + f1_BE = (D.f[DIR_P0M])[kbe ]; + f1_TW = (D.f[DIR_M0P])[ktw ]; + f1_TN = (D.f[DIR_0PP])[ktn ]; + f1_BS = (D.f[DIR_0MM])[kbs ]; + f1_BN = (D.f[DIR_0PM])[kbn ]; + f1_TS = (D.f[DIR_0MP])[kts ]; f1_ZERO = (D.f[DIR_000])[kzero]; - f1_TNE = (D.f[DIR_PPP ])[ktne ]; - f1_TSW = (D.f[DIR_MMP ])[ktsw ]; - f1_TSE = (D.f[DIR_PMP ])[ktse ]; - f1_TNW = (D.f[DIR_MPP ])[ktnw ]; - f1_BNE = (D.f[DIR_PPM ])[kbne ]; - f1_BSW = (D.f[DIR_MMM ])[kbsw ]; - f1_BSE = (D.f[DIR_PMM ])[kbse ]; - f1_BNW = (D.f[DIR_MPM ])[kbnw ]; + f1_TNE = (D.f[DIR_PPP])[ktne ]; + f1_TSW = (D.f[DIR_MMP])[ktsw ]; + f1_TSE = (D.f[DIR_PMP])[ktse ]; + f1_TNW = (D.f[DIR_MPP])[ktnw ]; + f1_BNE = (D.f[DIR_PPM])[kbne ]; + f1_BSW = (D.f[DIR_MMM])[kbsw ]; + f1_BSE = (D.f[DIR_PMM])[kbse ]; + f1_BNW = (D.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// real cs = c1o1/sqrt(c3o1); real csp1 = cs + c1o1; @@ -222,15 +222,15 @@ __global__ void PressSchlaff27(real* rhoBC, deltaVz0[k] = tempDeltaV; - (D.f[DIR_00M ])[kb ] = f1_B ; - (D.f[DIR_M0M ])[kbw ] = f1_BW ; - (D.f[DIR_P0M ])[kbe ] = f1_BE ; - (D.f[DIR_0MM ])[kbs ] = f1_BS ; - (D.f[DIR_0PM ])[kbn ] = f1_BN ; - (D.f[DIR_PPM ])[kbne ] = f1_BNE ; - (D.f[DIR_MMM ])[kbsw ] = f1_BSW ; - (D.f[DIR_PMM ])[kbse ] = f1_BSE ; - (D.f[DIR_MPM ])[kbnw ] = f1_BNW ; + (D.f[DIR_00M])[kb ] = f1_B ; + (D.f[DIR_M0M])[kbw ] = f1_BW ; + (D.f[DIR_P0M])[kbe ] = f1_BE ; + (D.f[DIR_0MM])[kbs ] = f1_BS ; + (D.f[DIR_0PM])[kbn ] = f1_BN ; + (D.f[DIR_PPM])[kbne ] = f1_BNE ; + (D.f[DIR_MMM])[kbsw ] = f1_BSW ; + (D.f[DIR_PMM])[kbse ] = f1_BSE ; + (D.f[DIR_MPM])[kbnw ] = f1_BNW ; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -285,7 +285,7 @@ __global__ void VelSchlaff27( int t, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -335,122 +335,122 @@ __global__ void VelSchlaff27( int t, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_E = (D.f[DIR_P00 ])[ke ]; - f1_W = (D.f[DIR_M00 ])[kw ]; - f1_N = (D.f[DIR_0P0 ])[kn ]; - f1_S = (D.f[DIR_0M0 ])[ks ]; - f1_T = (D.f[DIR_00P ])[kt ]; - f1_B = (D.f[DIR_00M ])[kb ]; - f1_NE = (D.f[DIR_PP0 ])[kne ]; - f1_SW = (D.f[DIR_MM0 ])[ksw ]; - f1_SE = (D.f[DIR_PM0 ])[kse ]; - f1_NW = (D.f[DIR_MP0 ])[knw ]; - f1_TE = (D.f[DIR_P0P ])[kte ]; - f1_BW = (D.f[DIR_M0M ])[kbw ]; - f1_BE = (D.f[DIR_P0M ])[kbe ]; - f1_TW = (D.f[DIR_M0P ])[ktw ]; - f1_TN = (D.f[DIR_0PP ])[ktn ]; - f1_BS = (D.f[DIR_0MM ])[kbs ]; - f1_BN = (D.f[DIR_0PM ])[kbn ]; - f1_TS = (D.f[DIR_0MP ])[kts ]; + f1_E = (D.f[DIR_P00])[ke ]; + f1_W = (D.f[DIR_M00])[kw ]; + f1_N = (D.f[DIR_0P0])[kn ]; + f1_S = (D.f[DIR_0M0])[ks ]; + f1_T = (D.f[DIR_00P])[kt ]; + f1_B = (D.f[DIR_00M])[kb ]; + f1_NE = (D.f[DIR_PP0])[kne ]; + f1_SW = (D.f[DIR_MM0])[ksw ]; + f1_SE = (D.f[DIR_PM0])[kse ]; + f1_NW = (D.f[DIR_MP0])[knw ]; + f1_TE = (D.f[DIR_P0P])[kte ]; + f1_BW = (D.f[DIR_M0M])[kbw ]; + f1_BE = (D.f[DIR_P0M])[kbe ]; + f1_TW = (D.f[DIR_M0P])[ktw ]; + f1_TN = (D.f[DIR_0PP])[ktn ]; + f1_BS = (D.f[DIR_0MM])[kbs ]; + f1_BN = (D.f[DIR_0PM])[kbn ]; + f1_TS = (D.f[DIR_0MP])[kts ]; f1_ZERO = (D.f[DIR_000])[kzero]; - f1_TNE = (D.f[DIR_PPP ])[ktne ]; - f1_TSW = (D.f[DIR_MMP ])[ktsw ]; - f1_TSE = (D.f[DIR_PMP ])[ktse ]; - f1_TNW = (D.f[DIR_MPP ])[ktnw ]; - f1_BNE = (D.f[DIR_PPM ])[kbne ]; - f1_BSW = (D.f[DIR_MMM ])[kbsw ]; - f1_BSE = (D.f[DIR_PMM ])[kbse ]; - f1_BNW = (D.f[DIR_MPM ])[kbnw ]; - //f1_W = (D.f[DIR_P00 ])[ke ]; - //f1_E = (D.f[DIR_M00 ])[kw ]; - //f1_S = (D.f[DIR_0P0 ])[kn ]; - //f1_N = (D.f[DIR_0M0 ])[ks ]; - //f1_B = (D.f[DIR_00P ])[kt ]; - //f1_T = (D.f[DIR_00M ])[kb ]; - //f1_SW = (D.f[DIR_PP0 ])[kne ]; - //f1_NE = (D.f[DIR_MM0 ])[ksw ]; - //f1_NW = (D.f[DIR_PM0 ])[kse ]; - //f1_SE = (D.f[DIR_MP0 ])[knw ]; - //f1_BW = (D.f[DIR_P0P ])[kte ]; - //f1_TE = (D.f[DIR_M0M ])[kbw ]; - //f1_TW = (D.f[DIR_P0M ])[kbe ]; - //f1_BE = (D.f[DIR_M0P ])[ktw ]; - //f1_BS = (D.f[DIR_0PP ])[ktn ]; - //f1_TN = (D.f[DIR_0MM ])[kbs ]; - //f1_TS = (D.f[DIR_0PM ])[kbn ]; - //f1_BN = (D.f[DIR_0MP ])[kts ]; + f1_TNE = (D.f[DIR_PPP])[ktne ]; + f1_TSW = (D.f[DIR_MMP])[ktsw ]; + f1_TSE = (D.f[DIR_PMP])[ktse ]; + f1_TNW = (D.f[DIR_MPP])[ktnw ]; + f1_BNE = (D.f[DIR_PPM])[kbne ]; + f1_BSW = (D.f[DIR_MMM])[kbsw ]; + f1_BSE = (D.f[DIR_PMM])[kbse ]; + f1_BNW = (D.f[DIR_MPM])[kbnw ]; + //f1_W = (D.f[DIR_P00])[ke ]; + //f1_E = (D.f[DIR_M00])[kw ]; + //f1_S = (D.f[DIR_0P0])[kn ]; + //f1_N = (D.f[DIR_0M0])[ks ]; + //f1_B = (D.f[DIR_00P])[kt ]; + //f1_T = (D.f[DIR_00M])[kb ]; + //f1_SW = (D.f[DIR_PP0])[kne ]; + //f1_NE = (D.f[DIR_MM0])[ksw ]; + //f1_NW = (D.f[DIR_PM0])[kse ]; + //f1_SE = (D.f[DIR_MP0])[knw ]; + //f1_BW = (D.f[DIR_P0P])[kte ]; + //f1_TE = (D.f[DIR_M0M])[kbw ]; + //f1_TW = (D.f[DIR_P0M])[kbe ]; + //f1_BE = (D.f[DIR_M0P])[ktw ]; + //f1_BS = (D.f[DIR_0PP])[ktn ]; + //f1_TN = (D.f[DIR_0MM])[kbs ]; + //f1_TS = (D.f[DIR_0PM])[kbn ]; + //f1_BN = (D.f[DIR_0MP])[kts ]; //f1_ZERO = (D.f[DIR_000])[kzero]; - //f1_BSW = (D.f[DIR_PPP ])[ktne ]; - //f1_BNE = (D.f[DIR_MMP ])[ktsw ]; - //f1_BNW = (D.f[DIR_PMP ])[ktse ]; - //f1_BSE = (D.f[DIR_MPP ])[ktnw ]; - //f1_TSW = (D.f[DIR_PPM ])[kbne ]; - //f1_TNE = (D.f[DIR_MMM ])[kbsw ]; - //f1_TNW = (D.f[DIR_PMM ])[kbse ]; - //f1_TSE = (D.f[DIR_MPM ])[kbnw ]; + //f1_BSW = (D.f[DIR_PPP])[ktne ]; + //f1_BNE = (D.f[DIR_MMP])[ktsw ]; + //f1_BNW = (D.f[DIR_PMP])[ktse ]; + //f1_BSE = (D.f[DIR_MPP])[ktnw ]; + //f1_TSW = (D.f[DIR_PPM])[kbne ]; + //f1_TNE = (D.f[DIR_MMM])[kbsw ]; + //f1_TNW = (D.f[DIR_PMM])[kbse ]; + //f1_TSE = (D.f[DIR_MPM])[kbnw ]; ////////////////////////////////////////////////////////////////////////// real cs = c1o1/sqrt(c3o1); real csp1 = cs + c1o1; @@ -522,64 +522,64 @@ __global__ void VelSchlaff27( int t, f1_TNW = f1_BSE - c1o36 * (VX - VY - VZ); deltaVz0[k] = tempDeltaV; - (D.f[DIR_00P ])[kt ] = f1_T ; - (D.f[DIR_P0P ])[kte ] = f1_TE ; - (D.f[DIR_M0P ])[ktw ] = f1_TW ; - (D.f[DIR_0PP ])[ktn ] = f1_TN ; - (D.f[DIR_0MP ])[kts ] = f1_TS ; - (D.f[DIR_PPP ])[ktne ] = f1_TNE; - (D.f[DIR_MMP ])[ktsw ] = f1_TSW; - (D.f[DIR_PMP ])[ktse ] = f1_TSE; - (D.f[DIR_MPP ])[ktnw ] = f1_TNW; - - //(D.f[DIR_00M ])[kb ] = f1_B ; - //(D.f[DIR_M0M ])[kbw ] = f1_BW ; - //(D.f[DIR_P0M ])[kbe ] = f1_BE ; - //(D.f[DIR_0MM ])[kbs ] = f1_BS ; - //(D.f[DIR_0PM ])[kbn ] = f1_BN ; - //(D.f[DIR_PPM ])[kbne ] = f1_BNE ; - //(D.f[DIR_MMM ])[kbsw ] = f1_BSW ; - //(D.f[DIR_PMM ])[kbse ] = f1_BSE ; - //(D.f[DIR_MPM ])[kbnw ] = f1_BNW ; - - - //(D.f[DIR_00P ])[kt ] = f1_B ; - //(D.f[DIR_P0P ])[kte ] = f1_BW ; - //(D.f[DIR_M0P ])[ktw ] = f1_BE ; - //(D.f[DIR_0PP ])[ktn ] = f1_BS ; - //(D.f[DIR_0MP ])[kts ] = f1_BN ; - //(D.f[DIR_PPP ])[ktne ] = f1_BSW; - //(D.f[DIR_MMP ])[ktsw ] = f1_BNE; - //(D.f[DIR_PMP ])[ktse ] = f1_BNW; - //(D.f[DIR_MPP ])[ktnw ] = f1_BSE; - - //(D.f[DIR_P00 ])[ke ] = f1_W -c2over27*drho1; - //(D.f[DIR_M00 ])[kw ] = f1_E -c2over27*drho1; - //(D.f[DIR_0P0 ])[kn ] = f1_S -c2over27*drho1; - //(D.f[DIR_0M0 ])[ks ] = f1_N -c2over27*drho1; - //(D.f[DIR_00P ])[kt ] = f1_B -c2over27*drho1; - //(D.f[DIR_00M ])[kb ] = f1_T -c2over27*drho1; - //(D.f[DIR_PP0 ])[kne ] = f1_SW -c1over54*drho1; - //(D.f[DIR_MM0 ])[ksw ] = f1_NE -c1over54*drho1; - //(D.f[DIR_PM0 ])[kse ] = f1_NW -c1over54*drho1; - //(D.f[DIR_MP0 ])[knw ] = f1_SE -c1over54*drho1; - //(D.f[DIR_P0P ])[kte ] = f1_BW -c1over54*drho1; - //(D.f[DIR_M0M ])[kbw ] = f1_TE -c1over54*drho1; - //(D.f[DIR_P0M ])[kbe ] = f1_TW -c1over54*drho1; - //(D.f[DIR_M0P ])[ktw ] = f1_BE -c1over54*drho1; - //(D.f[DIR_0PP ])[ktn ] = f1_BS -c1over54*drho1; - //(D.f[DIR_0MM ])[kbs ] = f1_TN -c1over54*drho1; - //(D.f[DIR_0PM ])[kbn ] = f1_TS -c1over54*drho1; - //(D.f[DIR_0MP ])[kts ] = f1_BN -c1over54*drho1; + (D.f[DIR_00P])[kt ] = f1_T ; + (D.f[DIR_P0P])[kte ] = f1_TE ; + (D.f[DIR_M0P])[ktw ] = f1_TW ; + (D.f[DIR_0PP])[ktn ] = f1_TN ; + (D.f[DIR_0MP])[kts ] = f1_TS ; + (D.f[DIR_PPP])[ktne ] = f1_TNE; + (D.f[DIR_MMP])[ktsw ] = f1_TSW; + (D.f[DIR_PMP])[ktse ] = f1_TSE; + (D.f[DIR_MPP])[ktnw ] = f1_TNW; + + //(D.f[DIR_00M])[kb ] = f1_B ; + //(D.f[DIR_M0M])[kbw ] = f1_BW ; + //(D.f[DIR_P0M])[kbe ] = f1_BE ; + //(D.f[DIR_0MM])[kbs ] = f1_BS ; + //(D.f[DIR_0PM])[kbn ] = f1_BN ; + //(D.f[DIR_PPM])[kbne ] = f1_BNE ; + //(D.f[DIR_MMM])[kbsw ] = f1_BSW ; + //(D.f[DIR_PMM])[kbse ] = f1_BSE ; + //(D.f[DIR_MPM])[kbnw ] = f1_BNW ; + + + //(D.f[DIR_00P])[kt ] = f1_B ; + //(D.f[DIR_P0P])[kte ] = f1_BW ; + //(D.f[DIR_M0P])[ktw ] = f1_BE ; + //(D.f[DIR_0PP])[ktn ] = f1_BS ; + //(D.f[DIR_0MP])[kts ] = f1_BN ; + //(D.f[DIR_PPP])[ktne ] = f1_BSW; + //(D.f[DIR_MMP])[ktsw ] = f1_BNE; + //(D.f[DIR_PMP])[ktse ] = f1_BNW; + //(D.f[DIR_MPP])[ktnw ] = f1_BSE; + + //(D.f[DIR_P00])[ke ] = f1_W -c2over27*drho1; + //(D.f[DIR_M00])[kw ] = f1_E -c2over27*drho1; + //(D.f[DIR_0P0])[kn ] = f1_S -c2over27*drho1; + //(D.f[DIR_0M0])[ks ] = f1_N -c2over27*drho1; + //(D.f[DIR_00P])[kt ] = f1_B -c2over27*drho1; + //(D.f[DIR_00M])[kb ] = f1_T -c2over27*drho1; + //(D.f[DIR_PP0])[kne ] = f1_SW -c1over54*drho1; + //(D.f[DIR_MM0])[ksw ] = f1_NE -c1over54*drho1; + //(D.f[DIR_PM0])[kse ] = f1_NW -c1over54*drho1; + //(D.f[DIR_MP0])[knw ] = f1_SE -c1over54*drho1; + //(D.f[DIR_P0P])[kte ] = f1_BW -c1over54*drho1; + //(D.f[DIR_M0M])[kbw ] = f1_TE -c1over54*drho1; + //(D.f[DIR_P0M])[kbe ] = f1_TW -c1over54*drho1; + //(D.f[DIR_M0P])[ktw ] = f1_BE -c1over54*drho1; + //(D.f[DIR_0PP])[ktn ] = f1_BS -c1over54*drho1; + //(D.f[DIR_0MM])[kbs ] = f1_TN -c1over54*drho1; + //(D.f[DIR_0PM])[kbn ] = f1_TS -c1over54*drho1; + //(D.f[DIR_0MP])[kts ] = f1_BN -c1over54*drho1; //(D.f[DIR_000])[kzero] = f1_ZERO-c8over27*drho1; - //(D.f[DIR_PPP ])[ktne ] = f1_BSW -c1over216*drho1; - //(D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1over216*drho1; - //(D.f[DIR_PMP ])[ktse ] = f1_BNW -c1over216*drho1; - //(D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1over216*drho1; - //(D.f[DIR_PPM ])[kbne ] = f1_TSW -c1over216*drho1; - //(D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1over216*drho1; - //(D.f[DIR_PMM ])[kbse ] = f1_TNW -c1over216*drho1; - //(D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1over216*drho1; + //(D.f[DIR_PPP])[ktne ] = f1_BSW -c1over216*drho1; + //(D.f[DIR_MMP])[ktsw ] = f1_BNE -c1over216*drho1; + //(D.f[DIR_PMP])[ktse ] = f1_BNW -c1over216*drho1; + //(D.f[DIR_MPP])[ktnw ] = f1_BSE -c1over216*drho1; + //(D.f[DIR_PPM])[kbne ] = f1_TSW -c1over216*drho1; + //(D.f[DIR_MMM])[kbsw ] = f1_TNE -c1over216*drho1; + //(D.f[DIR_PMM])[kbse ] = f1_TNW -c1over216*drho1; + //(D.f[DIR_MPM])[kbnw ] = f1_TSE -c1over216*drho1; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu index 8dbf2c670a549f9a6afe581510205c31246b50cb..07fc5853eb7042d5567c38a03cb27418142bf642 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu @@ -16,69 +16,69 @@ __global__ void GetVeloforForcing27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -123,33 +123,33 @@ __global__ void GetVeloforForcing27( real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[ke ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[kn ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[kt ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[kne ]; - real mfaab = (D.f[DIR_MM0 ])[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[kse ]; - real mfacb = (D.f[DIR_MP0 ])[knw ]; - real mfcbc = (D.f[DIR_P0P ])[kte ]; - real mfaba = (D.f[DIR_M0M ])[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kbe ]; - real mfabc = (D.f[DIR_M0P ])[ktw ]; - real mfbcc = (D.f[DIR_0PP ])[ktn ]; - real mfbaa = (D.f[DIR_0MM ])[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kbn ]; - real mfbac = (D.f[DIR_0MP ])[kts ]; + real mfcbb = (D.f[DIR_P00])[ke ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[kn ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[kt ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[kne ]; + real mfaab = (D.f[DIR_MM0])[ksw ]; + real mfcab = (D.f[DIR_PM0])[kse ]; + real mfacb = (D.f[DIR_MP0])[knw ]; + real mfcbc = (D.f[DIR_P0P])[kte ]; + real mfaba = (D.f[DIR_M0M])[kbw ]; + real mfcba = (D.f[DIR_P0M])[kbe ]; + real mfabc = (D.f[DIR_M0P])[ktw ]; + real mfbcc = (D.f[DIR_0PP])[ktn ]; + real mfbaa = (D.f[DIR_0MM])[kbs ]; + real mfbca = (D.f[DIR_0PM])[kbn ]; + real mfbac = (D.f[DIR_0MP])[kts ]; real mfbbb = (D.f[DIR_000])[kzero]; - real mfccc = (D.f[DIR_PPP ])[ktne ]; - real mfaac = (D.f[DIR_MMP ])[ktsw ]; - real mfcac = (D.f[DIR_PMP ])[ktse ]; - real mfacc = (D.f[DIR_MPP ])[ktnw ]; - real mfcca = (D.f[DIR_PPM ])[kbne ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw ]; - real mfcaa = (D.f[DIR_PMM ])[kbse ]; - real mfaca = (D.f[DIR_MPM ])[kbnw ]; + real mfccc = (D.f[DIR_PPP])[ktne ]; + real mfaac = (D.f[DIR_MMP])[ktsw ]; + real mfcac = (D.f[DIR_PMP])[ktse ]; + real mfacc = (D.f[DIR_MPP])[ktnw ]; + real mfcca = (D.f[DIR_PPM])[kbne ]; + real mfaaa = (D.f[DIR_MMM])[kbsw ]; + real mfcaa = (D.f[DIR_PMM])[kbse ]; + real mfaca = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////////// real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb + diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu index 0079c927373e90c1e408d2c57ace0595bcfdff15..cc8ca53d15ac02686b850a70ab181bb47285a7d1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu @@ -1,84 +1,117 @@ -/* Device code */ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SlipBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipDevice27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QSlipDevice27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -99,24 +132,24 @@ __global__ void QSlipDevice27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -156,32 +189,32 @@ __global__ void QSlipDevice27(real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -206,63 +239,63 @@ __global__ void QSlipDevice27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -659,32 +692,26 @@ __global__ void QSlipDevice27(real* DD, ////////////////////////////////////////////////////////////////////////////// __global__ void QSlipDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -702,7 +729,7 @@ __global__ void QSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -734,32 +761,32 @@ __global__ void QSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -804,7 +831,7 @@ __global__ void QSlipDeviceComp27( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -816,7 +843,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -828,7 +855,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -840,7 +867,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -852,7 +879,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -864,7 +891,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -876,7 +903,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -890,7 +917,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -904,7 +931,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -918,7 +945,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -932,7 +959,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -946,7 +973,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -955,12 +982,12 @@ __global__ void QSlipDeviceComp27( if (z == true) VeloZ = c0o1; velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); velocityBC = -VeloX - VeloZ; (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -974,7 +1001,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -988,7 +1015,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1002,7 +1029,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1017,7 +1044,7 @@ __global__ void QSlipDeviceComp27( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1031,7 +1058,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1045,7 +1072,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1060,7 +1087,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1076,7 +1103,7 @@ __global__ void QSlipDeviceComp27( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1091,7 +1118,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1106,7 +1133,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1121,7 +1148,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1136,7 +1163,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1151,7 +1178,7 @@ __global__ void QSlipDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1169,34 +1196,53 @@ __global__ void QSlipDeviceComp27( } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + ////////////////////////////////////////////////////////////////////////////// __global__ void BBSlipDeviceComp27( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1214,7 +1260,7 @@ __global__ void BBSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1246,32 +1292,32 @@ __global__ void BBSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -1316,7 +1362,7 @@ __global__ void BBSlipDeviceComp27( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -1326,7 +1372,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M00])[kw] = getBounceBackDistributionForVeloBC(f_W, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -1336,7 +1382,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_P00])[ke] = getBounceBackDistributionForVeloBC(f_E, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1346,7 +1392,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0M0])[ks] = getBounceBackDistributionForVeloBC(f_S, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1356,7 +1402,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0P0])[kn] = getBounceBackDistributionForVeloBC(f_N, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1366,7 +1412,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_00M])[kb] = getBounceBackDistributionForVeloBC(f_B, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1376,7 +1422,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_00P])[kt] = getBounceBackDistributionForVeloBC(f_T, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1388,7 +1434,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MM0])[ksw] = getBounceBackDistributionForVeloBC(f_SW, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1400,7 +1446,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PP0])[kne] = getBounceBackDistributionForVeloBC(f_NE, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1412,7 +1458,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MP0])[knw] = getBounceBackDistributionForVeloBC(f_NW, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1424,7 +1470,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PM0])[kse] = getBounceBackDistributionForVeloBC(f_SE, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1436,7 +1482,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M0M])[kbw] = getBounceBackDistributionForVeloBC(f_BW, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1444,11 +1490,11 @@ __global__ void BBSlipDeviceComp27( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1460,7 +1506,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_M0P])[ktw] = getBounceBackDistributionForVeloBC(f_TW, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1472,7 +1518,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_P0M])[kbe] = getBounceBackDistributionForVeloBC(f_BE, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1484,7 +1530,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0MM])[kbs] = getBounceBackDistributionForVeloBC(f_BS, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1497,7 +1543,7 @@ __global__ void BBSlipDeviceComp27( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1509,7 +1555,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0MP])[kts] = getBounceBackDistributionForVeloBC(f_TS, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1521,7 +1567,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_0PM])[kbn] = getBounceBackDistributionForVeloBC(f_BN, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1535,7 +1581,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1550,7 +1596,7 @@ __global__ void BBSlipDeviceComp27( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1564,7 +1610,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getBounceBackDistributionForVeloBC(f_TSW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1578,7 +1624,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PPM])[kbne] = getBounceBackDistributionForVeloBC(f_BNE, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1592,7 +1638,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getBounceBackDistributionForVeloBC(f_BNW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1606,7 +1652,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_PMP])[ktse] = getBounceBackDistributionForVeloBC(f_TSE, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1620,7 +1666,7 @@ __global__ void BBSlipDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getBounceBackDistributionForVeloBC(f_TNW, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1638,35 +1684,55 @@ __global__ void BBSlipDeviceComp27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + //////////////////////////////////////////////////////////////////////////// __global__ void QSlipDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* turbViscosity, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! + //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; + const unsigned nodeIndex = getNodeIndex(); - const unsigned k = nx*(ny*z + y) + x; - - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -1684,7 +1750,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -1716,32 +1782,32 @@ __global__ void QSlipDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -1791,7 +1857,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -1803,7 +1869,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -1815,7 +1881,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1827,7 +1893,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -1839,7 +1905,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1851,7 +1917,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -1863,7 +1929,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, om_turb, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1877,7 +1943,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1891,7 +1957,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1905,7 +1971,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1919,7 +1985,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1933,7 +1999,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1941,13 +2007,13 @@ __global__ void QSlipDeviceComp27TurbViscosity( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54); + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1961,7 +2027,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -1975,7 +2041,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -1989,7 +2055,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2004,7 +2070,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2018,7 +2084,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2032,7 +2098,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, om_turb, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2047,7 +2113,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2063,7 +2129,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2078,7 +2144,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2093,7 +2159,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2108,7 +2174,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2123,7 +2189,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2138,7 +2204,7 @@ __global__ void QSlipDeviceComp27TurbViscosity( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, om_turb, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2154,37 +2220,59 @@ __global__ void QSlipDeviceComp27TurbViscosity( } } } +//////////////////////////////////////////////////////////////////////////// + + + + + + + + + + + + + + + + + + + + + + + + + + + + //////////////////////////////////////////////////////////////////////////// __global__ void QSlipPressureDeviceComp27TurbViscosity( - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* turbViscosity, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* turbViscosity, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //! The slip boundary condition is executed in the following steps //! //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -2202,7 +2290,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -2234,32 +2322,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -2309,7 +2397,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( bool y = false; bool z = false; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { VeloX = c0o1; @@ -2321,7 +2409,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = c0o1; @@ -2333,7 +2421,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -2345,7 +2433,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = c0o1; @@ -2357,7 +2445,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -2369,7 +2457,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloZ = c0o1; @@ -2381,7 +2469,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, om_turb, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2395,7 +2483,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2409,7 +2497,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2423,7 +2511,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2437,7 +2525,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2451,7 +2539,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2459,13 +2547,13 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( if (x == true) VeloX = c0o1; if (z == true) VeloZ = c0o1; - velocityLB = -vx1 - vx3; - feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); - velocityBC = -VeloX - VeloZ; - (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54); + velocityLB = -vx1 - vx3; + feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54); + velocityBC = -VeloX - VeloZ; + (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2479,7 +2567,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2493,7 +2581,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2507,7 +2595,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2522,7 +2610,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2536,7 +2624,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloY = slipLength*vx2; @@ -2550,7 +2638,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2565,7 +2653,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2581,7 +2669,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2596,7 +2684,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2611,7 +2699,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2626,7 +2714,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2641,7 +2729,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2656,7 +2744,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { VeloX = slipLength*vx1; @@ -2688,63 +2776,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( // Distributions27 D; // if (isEvenTimestep==true) // { -// D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; -// D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; -// D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; -// D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; -// D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; -// D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; -// D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; -// D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; -// D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; -// D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; -// D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; -// D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; -// D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; -// D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; -// D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; -// D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; -// D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; -// D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DD[DIR_000*size_Mat]; -// D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; -// D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; -// D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; -// D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; -// D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; -// D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; -// D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; -// D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; +// D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; +// D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; +// D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; +// D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; +// D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; +// D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; +// D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; +// D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; +// D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; +// D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; +// D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; +// D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; +// D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; +// D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; +// D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; +// D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; +// D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; +// D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DD[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; +// D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; +// D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; +// D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; +// D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; +// D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; +// D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; +// D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; // } // else // { -// D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; -// D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; -// D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; -// D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; -// D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; -// D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; -// D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; -// D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; -// D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; -// D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; -// D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; -// D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; -// D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; -// D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; -// D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; -// D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; -// D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; -// D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DD[DIR_000*size_Mat]; -// D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; -// D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; -// D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; -// D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; -// D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; -// D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; -// D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; -// D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; +// D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; +// D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; +// D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; +// D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; +// D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; +// D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; +// D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; +// D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; +// D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; +// D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; +// D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; +// D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; +// D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; +// D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; +// D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; +// D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; +// D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; +// D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DD[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; +// D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; +// D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; +// D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; +// D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; +// D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; +// D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; +// D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; // } // //////////////////////////////////////////////////////////////////////////////// // const unsigned x = threadIdx.x; // Globaler x-Index @@ -2765,24 +2853,24 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( // *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, // *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, // *q_dirBSE, *q_dirBNW; -// q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; -// q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; -// q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; -// q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; -// q_dirT = &QQ[DIR_00P * numberOfBCnodes]; -// q_dirB = &QQ[DIR_00M * numberOfBCnodes]; -// q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; -// q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; -// q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; -// q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; -// q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; -// q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; -// q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; -// q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; -// q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; -// q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; -// q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; -// q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; +// q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; +// q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; +// q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; +// q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; +// q_dirT = &QQ[DIR_00P * numberOfBCnodes]; +// q_dirB = &QQ[DIR_00M * numberOfBCnodes]; +// q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; +// q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; +// q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; +// q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; +// q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; +// q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; +// q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; +// q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; +// q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; +// q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; +// q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; +// q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; // q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; // q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; // q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -2823,32 +2911,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( // unsigned int kbsw = neighborZ[ksw]; // //////////////////////////////////////////////////////////////////////////////// -// real f_W = (D.f[DIR_P00 ])[ke ]; -// real f_E = (D.f[DIR_M00 ])[kw ]; -// real f_S = (D.f[DIR_0P0 ])[kn ]; -// real f_N = (D.f[DIR_0M0 ])[ks ]; -// real f_B = (D.f[DIR_00P ])[kt ]; -// real f_T = (D.f[DIR_00M ])[kb ]; -// real f_SW = (D.f[DIR_PP0 ])[kne ]; -// real f_NE = (D.f[DIR_MM0 ])[ksw ]; -// real f_NW = (D.f[DIR_PM0 ])[kse ]; -// real f_SE = (D.f[DIR_MP0 ])[knw ]; -// real f_BW = (D.f[DIR_P0P ])[kte ]; -// real f_TE = (D.f[DIR_M0M ])[kbw ]; -// real f_TW = (D.f[DIR_P0M ])[kbe ]; -// real f_BE = (D.f[DIR_M0P ])[ktw ]; -// real f_BS = (D.f[DIR_0PP ])[ktn ]; -// real f_TN = (D.f[DIR_0MM ])[kbs ]; -// real f_TS = (D.f[DIR_0PM ])[kbn ]; -// real f_BN = (D.f[DIR_0MP ])[kts ]; -// real f_BSW = (D.f[DIR_PPP ])[ktne ]; -// real f_BNE = (D.f[DIR_MMP ])[ktsw ]; -// real f_BNW = (D.f[DIR_PMP ])[ktse ]; -// real f_BSE = (D.f[DIR_MPP ])[ktnw ]; -// real f_TSW = (D.f[DIR_PPM ])[kbne ]; -// real f_TNE = (D.f[DIR_MMM ])[kbsw ]; -// real f_TNW = (D.f[DIR_PMM ])[kbse ]; -// real f_TSE = (D.f[DIR_MPM ])[kbnw ]; +// real f_W = (D.f[DIR_P00])[ke ]; +// real f_E = (D.f[DIR_M00])[kw ]; +// real f_S = (D.f[DIR_0P0])[kn ]; +// real f_N = (D.f[DIR_0M0])[ks ]; +// real f_B = (D.f[DIR_00P])[kt ]; +// real f_T = (D.f[DIR_00M])[kb ]; +// real f_SW = (D.f[DIR_PP0])[kne ]; +// real f_NE = (D.f[DIR_MM0])[ksw ]; +// real f_NW = (D.f[DIR_PM0])[kse ]; +// real f_SE = (D.f[DIR_MP0])[knw ]; +// real f_BW = (D.f[DIR_P0P])[kte ]; +// real f_TE = (D.f[DIR_M0M])[kbw ]; +// real f_TW = (D.f[DIR_P0M])[kbe ]; +// real f_BE = (D.f[DIR_M0P])[ktw ]; +// real f_BS = (D.f[DIR_0PP])[ktn ]; +// real f_TN = (D.f[DIR_0MM])[kbs ]; +// real f_TS = (D.f[DIR_0PM])[kbn ]; +// real f_BN = (D.f[DIR_0MP])[kts ]; +// real f_BSW = (D.f[DIR_PPP])[ktne ]; +// real f_BNE = (D.f[DIR_MMP])[ktsw ]; +// real f_BNW = (D.f[DIR_PMP])[ktse ]; +// real f_BSE = (D.f[DIR_MPP])[ktnw ]; +// real f_TSW = (D.f[DIR_PPM])[kbne ]; +// real f_TNE = (D.f[DIR_MMM])[kbsw ]; +// real f_TNW = (D.f[DIR_PMM])[kbse ]; +// real f_TSE = (D.f[DIR_MPM])[kbnw ]; // //////////////////////////////////////////////////////////////////////////////// // real vx1, vx2, vx3, drho, feq, q; // drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -2873,63 +2961,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( // ////////////////////////////////////////////////////////////////////////// // if (isEvenTimestep==false) // { -// D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; -// D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; -// D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; -// D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; -// D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; -// D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; -// D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; -// D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; -// D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; -// D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; -// D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; -// D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; -// D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; -// D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; -// D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; -// D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; -// D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; -// D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DD[DIR_000*size_Mat]; -// D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; -// D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; -// D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; -// D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; -// D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; -// D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; -// D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; -// D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; +// D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; +// D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; +// D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; +// D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; +// D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; +// D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; +// D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; +// D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; +// D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; +// D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; +// D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; +// D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; +// D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; +// D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; +// D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; +// D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; +// D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; +// D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DD[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; +// D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; +// D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; +// D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; +// D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; +// D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; +// D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; +// D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; // } // else // { -// D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; -// D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; -// D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; -// D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; -// D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; -// D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; -// D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; -// D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; -// D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; -// D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; -// D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; -// D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; -// D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; -// D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; -// D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; -// D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; -// D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; -// D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; -// D.f[DIR_000] = &DD[DIR_000*size_Mat]; -// D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; -// D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; -// D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; -// D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; -// D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; -// D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; -// D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; -// D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; +// D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; +// D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; +// D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; +// D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; +// D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; +// D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; +// D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; +// D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; +// D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; +// D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; +// D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; +// D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; +// D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; +// D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; +// D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; +// D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; +// D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; +// D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; +// D.f[DIR_000] = &DD[DIR_000 * size_Mat]; +// D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; +// D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; +// D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; +// D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; +// D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; +// D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; +// D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; +// D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; // } // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // //Test @@ -3378,80 +3466,81 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity( ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipGeomDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QSlipGeomDeviceComp27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -3472,24 +3561,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -3504,24 +3593,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD, *nx_dirBE, *nx_dirTW, *nx_dirTN, *nx_dirBS, *nx_dirBN, *nx_dirTS, *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW, *nx_dirBSE, *nx_dirBNW; - nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; - nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; - nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; - nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; - nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; - nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; - nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; - nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; - nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; - nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; - nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; - nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; - nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; - nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; - nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; - nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; - nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; - nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; + nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; + nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; + nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; + nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; + nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; + nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; + nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; + nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; + nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; + nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; + nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; + nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; + nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; + nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; + nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; + nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; + nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; + nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes]; nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes]; nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes]; @@ -3536,24 +3625,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD, *ny_dirBE, *ny_dirTW, *ny_dirTN, *ny_dirBS, *ny_dirBN, *ny_dirTS, *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW, *ny_dirBSE, *ny_dirBNW; - ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; - ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; - ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; - ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; - ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; - ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; - ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; - ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; - ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; - ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; - ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; - ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; - ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; - ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; - ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; - ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; - ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; - ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; + ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; + ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; + ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; + ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; + ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; + ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; + ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; + ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; + ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; + ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; + ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; + ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; + ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; + ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; + ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; + ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; + ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; + ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes]; ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes]; ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes]; @@ -3568,24 +3657,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD, *nz_dirBE, *nz_dirTW, *nz_dirTN, *nz_dirBS, *nz_dirBN, *nz_dirTS, *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW, *nz_dirBSE, *nz_dirBNW; - nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; - nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; - nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; - nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; - nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; - nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; - nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; - nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; - nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; - nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; - nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; - nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; - nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; - nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; - nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; - nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; - nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; - nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; + nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; + nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; + nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; + nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; + nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; + nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; + nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; + nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; + nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; + nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; + nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; + nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; + nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; + nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; + nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; + nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; + nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; + nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes]; nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes]; nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes]; @@ -3625,32 +3714,32 @@ __global__ void QSlipGeomDeviceComp27(real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -3675,63 +3764,63 @@ __global__ void QSlipGeomDeviceComp27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real VeloX = vx1; @@ -4264,80 +4353,81 @@ __global__ void QSlipGeomDeviceComp27(real* DD, ////////////////////////////////////////////////////////////////////////////// -__global__ void QSlipNormDeviceComp27(real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* NormalX, - real* NormalY, - real* NormalZ, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QSlipNormDeviceComp27( + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* NormalX, + real* NormalY, + real* NormalZ, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4358,24 +4448,24 @@ __global__ void QSlipNormDeviceComp27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -4390,24 +4480,24 @@ __global__ void QSlipNormDeviceComp27(real* DD, *nx_dirBE, *nx_dirTW, *nx_dirTN, *nx_dirBS, *nx_dirBN, *nx_dirTS, *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW, *nx_dirBSE, *nx_dirBNW; - nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; - nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; - nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; - nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; - nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; - nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; - nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; - nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; - nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; - nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; - nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; - nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; - nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; - nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; - nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; - nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; - nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; - nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; + nx_dirE = &NormalX[DIR_P00 * numberOfBCnodes]; + nx_dirW = &NormalX[DIR_M00 * numberOfBCnodes]; + nx_dirN = &NormalX[DIR_0P0 * numberOfBCnodes]; + nx_dirS = &NormalX[DIR_0M0 * numberOfBCnodes]; + nx_dirT = &NormalX[DIR_00P * numberOfBCnodes]; + nx_dirB = &NormalX[DIR_00M * numberOfBCnodes]; + nx_dirNE = &NormalX[DIR_PP0 * numberOfBCnodes]; + nx_dirSW = &NormalX[DIR_MM0 * numberOfBCnodes]; + nx_dirSE = &NormalX[DIR_PM0 * numberOfBCnodes]; + nx_dirNW = &NormalX[DIR_MP0 * numberOfBCnodes]; + nx_dirTE = &NormalX[DIR_P0P * numberOfBCnodes]; + nx_dirBW = &NormalX[DIR_M0M * numberOfBCnodes]; + nx_dirBE = &NormalX[DIR_P0M * numberOfBCnodes]; + nx_dirTW = &NormalX[DIR_M0P * numberOfBCnodes]; + nx_dirTN = &NormalX[DIR_0PP * numberOfBCnodes]; + nx_dirBS = &NormalX[DIR_0MM * numberOfBCnodes]; + nx_dirBN = &NormalX[DIR_0PM * numberOfBCnodes]; + nx_dirTS = &NormalX[DIR_0MP * numberOfBCnodes]; nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes]; nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes]; nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes]; @@ -4422,24 +4512,24 @@ __global__ void QSlipNormDeviceComp27(real* DD, *ny_dirBE, *ny_dirTW, *ny_dirTN, *ny_dirBS, *ny_dirBN, *ny_dirTS, *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW, *ny_dirBSE, *ny_dirBNW; - ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; - ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; - ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; - ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; - ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; - ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; - ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; - ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; - ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; - ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; - ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; - ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; - ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; - ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; - ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; - ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; - ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; - ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; + ny_dirE = &NormalY[DIR_P00 * numberOfBCnodes]; + ny_dirW = &NormalY[DIR_M00 * numberOfBCnodes]; + ny_dirN = &NormalY[DIR_0P0 * numberOfBCnodes]; + ny_dirS = &NormalY[DIR_0M0 * numberOfBCnodes]; + ny_dirT = &NormalY[DIR_00P * numberOfBCnodes]; + ny_dirB = &NormalY[DIR_00M * numberOfBCnodes]; + ny_dirNE = &NormalY[DIR_PP0 * numberOfBCnodes]; + ny_dirSW = &NormalY[DIR_MM0 * numberOfBCnodes]; + ny_dirSE = &NormalY[DIR_PM0 * numberOfBCnodes]; + ny_dirNW = &NormalY[DIR_MP0 * numberOfBCnodes]; + ny_dirTE = &NormalY[DIR_P0P * numberOfBCnodes]; + ny_dirBW = &NormalY[DIR_M0M * numberOfBCnodes]; + ny_dirBE = &NormalY[DIR_P0M * numberOfBCnodes]; + ny_dirTW = &NormalY[DIR_M0P * numberOfBCnodes]; + ny_dirTN = &NormalY[DIR_0PP * numberOfBCnodes]; + ny_dirBS = &NormalY[DIR_0MM * numberOfBCnodes]; + ny_dirBN = &NormalY[DIR_0PM * numberOfBCnodes]; + ny_dirTS = &NormalY[DIR_0MP * numberOfBCnodes]; ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes]; ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes]; ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes]; @@ -4454,24 +4544,24 @@ __global__ void QSlipNormDeviceComp27(real* DD, *nz_dirBE, *nz_dirTW, *nz_dirTN, *nz_dirBS, *nz_dirBN, *nz_dirTS, *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW, *nz_dirBSE, *nz_dirBNW; - nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; - nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; - nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; - nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; - nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; - nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; - nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; - nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; - nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; - nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; - nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; - nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; - nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; - nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; - nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; - nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; - nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; - nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; + nz_dirE = &NormalZ[DIR_P00 * numberOfBCnodes]; + nz_dirW = &NormalZ[DIR_M00 * numberOfBCnodes]; + nz_dirN = &NormalZ[DIR_0P0 * numberOfBCnodes]; + nz_dirS = &NormalZ[DIR_0M0 * numberOfBCnodes]; + nz_dirT = &NormalZ[DIR_00P * numberOfBCnodes]; + nz_dirB = &NormalZ[DIR_00M * numberOfBCnodes]; + nz_dirNE = &NormalZ[DIR_PP0 * numberOfBCnodes]; + nz_dirSW = &NormalZ[DIR_MM0 * numberOfBCnodes]; + nz_dirSE = &NormalZ[DIR_PM0 * numberOfBCnodes]; + nz_dirNW = &NormalZ[DIR_MP0 * numberOfBCnodes]; + nz_dirTE = &NormalZ[DIR_P0P * numberOfBCnodes]; + nz_dirBW = &NormalZ[DIR_M0M * numberOfBCnodes]; + nz_dirBE = &NormalZ[DIR_P0M * numberOfBCnodes]; + nz_dirTW = &NormalZ[DIR_M0P * numberOfBCnodes]; + nz_dirTN = &NormalZ[DIR_0PP * numberOfBCnodes]; + nz_dirBS = &NormalZ[DIR_0MM * numberOfBCnodes]; + nz_dirBN = &NormalZ[DIR_0PM * numberOfBCnodes]; + nz_dirTS = &NormalZ[DIR_0MP * numberOfBCnodes]; nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes]; nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes]; nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes]; @@ -4511,32 +4601,32 @@ __global__ void QSlipNormDeviceComp27(real* DD, unsigned int ktne = KQK; unsigned int kbsw = neighborZ[ksw]; //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -4561,63 +4651,63 @@ __global__ void QSlipNormDeviceComp27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real VeloX = vx1; diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu index 74e2faa38638228aa5d499aa74226405ab109f7d..3208299e93940dabe52faa7d0b3c684c45596660 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu @@ -43,28 +43,30 @@ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include <lbm/constants/NumericConstants.h> -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// -__host__ __device__ __forceinline__ void iMEM(uint k, uint kN, - real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ, - real* vx, real* vy, real* vz, - real* vx_el, real* vy_el, real* vz_el, //!>mean (temporally filtered) velocities at exchange location - real* vx_w_mean, real* vy_w_mean, real* vz_w_mean, //!>mean (temporally filtered) velocities at wall-adjactent node - real vx_w_inst, real vy_w_inst, real vz_w_inst, //!>instantaneous velocities at wall-adjactent node - real rho, - int* samplingOffset, - real q, - real forceFactor, //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015) - real eps, //!>filter constant in temporal averaging - real* z0, //!>aerodynamic roughness length - bool hasWallModelMonitor, - real* u_star_monitor, - real wallMomentumX, real wallMomentumY, real wallMomentumZ, - real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ) +__host__ __device__ __forceinline__ void iMEM( + uint k, uint kN, + real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ, + real* vx, real* vy, real* vz, + real* vx_el, real* vy_el, real* vz_el, //!>mean (temporally filtered) velocities at exchange location + real* vx_w_mean, real* vy_w_mean, real* vz_w_mean, //!>mean (temporally filtered) velocities at wall-adjactent node + real vx_w_inst, real vy_w_inst, real vz_w_inst, //!>instantaneous velocities at wall-adjactent node + real rho, + int* samplingOffset, + real q, + real forceFactor, //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015) + real eps, //!>filter constant in temporal averaging + real* z0, //!>aerodynamic roughness length + bool hasWallModelMonitor, + real* u_star_monitor, + real wallMomentumX, real wallMomentumY, real wallMomentumZ, + real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ) { real wallNormalX = _wallNormalX[k]; real wallNormalY = _wallNormalY[k]; @@ -136,99 +138,100 @@ __host__ __device__ __forceinline__ void iMEM(uint k, uint kN, } ////////////////////////////////////////////////////////////////////////////// -__global__ void QStressDeviceComp27(real* DD, - int* k_Q, - int* k_N, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real* turbViscosity, - real* vx, - real* vy, - real* vz, - real* normalX, - real* normalY, - real* normalZ, - real* vx_el, - real* vy_el, - real* vz_el, - real* vx_w_mean, - real* vy_w_mean, - real* vz_w_mean, - int* samplingOffset, - real* z0, - bool hasWallModelMonitor, - real* u_star_monitor, - real* Fx_monitor, - real* Fy_monitor, - real* Fz_monitor, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QStressDeviceComp27( + real* DD, + int* k_Q, + int* k_N, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real* turbViscosity, + real* vx, + real* vy, + real* vz, + real* normalX, + real* normalY, + real* normalZ, + real* vx_el, + real* vy_el, + real* vz_el, + real* vx_w_mean, + real* vy_w_mean, + real* vz_w_mean, + int* samplingOffset, + real* z0, + bool hasWallModelMonitor, + real* u_star_monitor, + real* Fx_monitor, + real* Fy_monitor, + real* Fz_monitor, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true)//get right array of post coll f's { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -249,24 +252,24 @@ __global__ void QStressDeviceComp27(real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -309,32 +312,32 @@ __global__ void QStressDeviceComp27(real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; //post-coll f's - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; //post-coll f's + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; @@ -361,63 +364,63 @@ __global__ void QStressDeviceComp27(real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) //get adress where incoming f's should be written to { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Compute incoming f's with zero wall velocity @@ -968,69 +971,69 @@ __global__ void BBStressDevice27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1051,24 +1054,24 @@ __global__ void BBStressDevice27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1112,32 +1115,32 @@ __global__ void BBStressDevice27( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho; @@ -1161,63 +1164,63 @@ __global__ void BBStressDevice27( real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E_in, f_W_in, f_N_in, f_S_in, f_T_in, f_B_in, f_NE_in, f_SW_in, f_SE_in, f_NW_in, f_TE_in, f_BW_in, f_BE_in, @@ -1715,69 +1718,69 @@ __global__ void BBStressPressureDevice27( real* DD, unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1798,24 +1801,24 @@ __global__ void BBStressPressureDevice27( real* DD, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1859,32 +1862,32 @@ __global__ void BBStressPressureDevice27( real* DD, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho; @@ -1908,63 +1911,63 @@ __global__ void BBStressPressureDevice27( real* DD, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real f_E_in, f_W_in, f_N_in, f_S_in, f_T_in, f_B_in, f_NE_in, f_SW_in, f_SE_in, f_NW_in, f_TE_in, f_BW_in, f_BE_in, diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu index 55f810628f370976289d1492e9916d5d3fa0dbb8..b96d961c9b92ae5d041beeb23482d7144e7a8acb 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu @@ -27,69 +27,69 @@ __global__ void QVelDeviceCompThinWallsPartOne27( uint* neighborX, uint* neighborY, uint* neighborZ, - uint size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -114,24 +114,24 @@ __global__ void QVelDeviceCompThinWallsPartOne27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -174,32 +174,32 @@ __global__ void QVelDeviceCompThinWallsPartOne27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -456,69 +456,69 @@ __global__ void QDeviceCompThinWallsPartOne27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep == true) { - D.f[DIR_P00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -539,24 +539,24 @@ __global__ void QDeviceCompThinWallsPartOne27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -882,7 +882,7 @@ __global__ void QThinWallsPartTwo27( uint* neighborY, uint* neighborZ, uint* neighborWSB, - uint size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// @@ -904,24 +904,24 @@ __global__ void QThinWallsPartTwo27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -980,123 +980,123 @@ __global__ void QThinWallsPartTwo27( Distributions27 D, DN; if (isEvenTimestep == true) { - D.f[DIR_P00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } if (isEvenTimestep==false) { - DN.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - DN.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - DN.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - DN.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - DN.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - DN.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - DN.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - DN.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - DN.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - DN.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - DN.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - DN.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - DN.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - DN.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - DN.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - DN.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - DN.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - DN.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - DN.f[DIR_000] = &DD[DIR_000*size_Mat]; - DN.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - DN.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - DN.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - DN.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - DN.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - DN.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - DN.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - DN.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + DN.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + DN.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + DN.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + DN.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + DN.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + DN.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + DN.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + DN.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + DN.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + DN.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + DN.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + DN.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + DN.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + DN.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + DN.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + DN.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + DN.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + DN.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + DN.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + DN.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + DN.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + DN.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + DN.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + DN.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + DN.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + DN.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - DN.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - DN.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - DN.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - DN.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - DN.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - DN.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - DN.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - DN.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - DN.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - DN.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - DN.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - DN.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - DN.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - DN.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - DN.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - DN.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - DN.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - DN.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - DN.f[DIR_000] = &DD[DIR_000*size_Mat]; - DN.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - DN.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - DN.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - DN.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - DN.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - DN.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - DN.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - DN.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + DN.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + DN.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + DN.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + DN.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + DN.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + DN.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + DN.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + DN.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + DN.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + DN.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + DN.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + DN.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + DN.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + DN.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + DN.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + DN.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + DN.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + DN.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + DN.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + DN.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + DN.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + DN.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + DN.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + DN.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + DN.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + DN.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //directions allways exchange @@ -1106,24 +1106,24 @@ __global__ void QThinWallsPartTwo27( //( 1 1 1) ( 1 0 0) ( 0 1 0) ( 0 0 1) ( 1 1 0) ( 1 0 1) ( 0 1 1) (-1 -1 1) (-1 1 -1) ( 1 -1 -1) (-1 1 0) (-1 0 1) ( 0 -1 1) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real q, tmp; - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kw ] < GEO_FLUID){tmp = (DN.f[DIR_M00 ])[kw ]; (DN.f[DIR_M00 ])[kw ]=(D.f[DIR_M00 ])[kw ]; (D.f[DIR_M00 ])[kw ]=tmp;}} - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_P00 ])[ke ]; (DN.f[DIR_P00 ])[ke ]=(D.f[DIR_P00 ])[ke ]; (D.f[DIR_P00 ])[ke ]=tmp;}} - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1){ if (geom[ks ] < GEO_FLUID){tmp = (DN.f[DIR_0M0 ])[ks ]; (DN.f[DIR_0M0 ])[ks ]=(D.f[DIR_0M0 ])[ks ]; (D.f[DIR_0M0 ])[ks ]=tmp;}} - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0P0 ])[kn ]; (DN.f[DIR_0P0 ])[kn ]=(D.f[DIR_0P0 ])[kn ]; (D.f[DIR_0P0 ])[kn ]=tmp;}} - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kb ] < GEO_FLUID){tmp = (DN.f[DIR_00M ])[kb ]; (DN.f[DIR_00M ])[kb ]=(D.f[DIR_00M ])[kb ]; (D.f[DIR_00M ])[kb ]=tmp;}} - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_00P ])[kt ]; (DN.f[DIR_00P ])[kt ]=(D.f[DIR_00P ])[kt ]; (D.f[DIR_00P ])[kt ]=tmp;}} - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0 ])[ksw ]; (DN.f[DIR_MM0 ])[ksw ]=(D.f[DIR_MM0 ])[ksw ]; (D.f[DIR_MM0 ])[ksw ]=tmp;}} - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_PP0 ])[kne ]; (DN.f[DIR_PP0 ])[kne ]=(D.f[DIR_PP0 ])[kne ]; (D.f[DIR_PP0 ])[kne ]=tmp;}} - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_MP0 ])[knw ]; (DN.f[DIR_MP0 ])[knw ]=(D.f[DIR_MP0 ])[knw ]; (D.f[DIR_MP0 ])[knw ]=tmp;}} - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0 ])[kse ]; (DN.f[DIR_PM0 ])[kse ]=(D.f[DIR_PM0 ])[kse ]; (D.f[DIR_PM0 ])[kse ]=tmp;}} - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M ])[kbw ]; (DN.f[DIR_M0M ])[kbw ]=(D.f[DIR_M0M ])[kbw ]; (D.f[DIR_M0M ])[kbw ]=tmp;}} - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_P0P ])[kte ]; (DN.f[DIR_P0P ])[kte ]=(D.f[DIR_P0P ])[kte ]; (D.f[DIR_P0P ])[kte ]=tmp;}} - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_M0P ])[ktw ]; (DN.f[DIR_M0P ])[ktw ]=(D.f[DIR_M0P ])[ktw ]; (D.f[DIR_M0P ])[ktw ]=tmp;}} - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M ])[kbe ]; (DN.f[DIR_P0M ])[kbe ]=(D.f[DIR_P0M ])[kbe ]; (D.f[DIR_P0M ])[kbe ]=tmp;}} - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM ])[kbs ]; (DN.f[DIR_0MM ])[kbs ]=(D.f[DIR_0MM ])[kbs ]; (D.f[DIR_0MM ])[kbs ]=tmp;}} - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0PP ])[ktn ]; (DN.f[DIR_0PP ])[ktn ]=(D.f[DIR_0PP ])[ktn ]; (D.f[DIR_0PP ])[ktn ]=tmp;}} - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0MP ])[kts ]; (DN.f[DIR_0MP ])[kts ]=(D.f[DIR_0MP ])[kts ]; (D.f[DIR_0MP ])[kts ]=tmp;}} - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM ])[kbn ]; (DN.f[DIR_0PM ])[kbn ]=(D.f[DIR_0PM ])[kbn ]; (D.f[DIR_0PM ])[kbn ]=tmp;}} + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kw ] < GEO_FLUID){tmp = (DN.f[DIR_M00])[kw ]; (DN.f[DIR_M00])[kw ]=(D.f[DIR_M00])[kw ]; (D.f[DIR_M00])[kw ]=tmp;}} + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_P00])[ke ]; (DN.f[DIR_P00])[ke ]=(D.f[DIR_P00])[ke ]; (D.f[DIR_P00])[ke ]=tmp;}} + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1){ if (geom[ks ] < GEO_FLUID){tmp = (DN.f[DIR_0M0])[ks ]; (DN.f[DIR_0M0])[ks ]=(D.f[DIR_0M0])[ks ]; (D.f[DIR_0M0])[ks ]=tmp;}} + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0P0])[kn ]; (DN.f[DIR_0P0])[kn ]=(D.f[DIR_0P0])[kn ]; (D.f[DIR_0P0])[kn ]=tmp;}} + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kb ] < GEO_FLUID){tmp = (DN.f[DIR_00M])[kb ]; (DN.f[DIR_00M])[kb ]=(D.f[DIR_00M])[kb ]; (D.f[DIR_00M])[kb ]=tmp;}} + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_00P])[kt ]; (DN.f[DIR_00P])[kt ]=(D.f[DIR_00P])[kt ]; (D.f[DIR_00P])[kt ]=tmp;}} + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0])[ksw ]; (DN.f[DIR_MM0])[ksw ]=(D.f[DIR_MM0])[ksw ]; (D.f[DIR_MM0])[ksw ]=tmp;}} + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_PP0])[kne ]; (DN.f[DIR_PP0])[kne ]=(D.f[DIR_PP0])[kne ]; (D.f[DIR_PP0])[kne ]=tmp;}} + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_MP0])[knw ]; (DN.f[DIR_MP0])[knw ]=(D.f[DIR_MP0])[knw ]; (D.f[DIR_MP0])[knw ]=tmp;}} + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0])[kse ]; (DN.f[DIR_PM0])[kse ]=(D.f[DIR_PM0])[kse ]; (D.f[DIR_PM0])[kse ]=tmp;}} + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M])[kbw ]; (DN.f[DIR_M0M])[kbw ]=(D.f[DIR_M0M])[kbw ]; (D.f[DIR_M0M])[kbw ]=tmp;}} + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_P0P])[kte ]; (DN.f[DIR_P0P])[kte ]=(D.f[DIR_P0P])[kte ]; (D.f[DIR_P0P])[kte ]=tmp;}} + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_M0P])[ktw ]; (DN.f[DIR_M0P])[ktw ]=(D.f[DIR_M0P])[ktw ]; (D.f[DIR_M0P])[ktw ]=tmp;}} + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M])[kbe ]; (DN.f[DIR_P0M])[kbe ]=(D.f[DIR_P0M])[kbe ]; (D.f[DIR_P0M])[kbe ]=tmp;}} + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM])[kbs ]; (DN.f[DIR_0MM])[kbs ]=(D.f[DIR_0MM])[kbs ]; (D.f[DIR_0MM])[kbs ]=tmp;}} + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0PP])[ktn ]; (DN.f[DIR_0PP])[ktn ]=(D.f[DIR_0PP])[ktn ]; (D.f[DIR_0PP])[ktn ]=tmp;}} + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_0MP])[kts ]; (DN.f[DIR_0MP])[kts ]=(D.f[DIR_0MP])[kts ]; (D.f[DIR_0MP])[kts ]=tmp;}} + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM])[kbn ]; (DN.f[DIR_0PM])[kbn ]=(D.f[DIR_0PM])[kbn ]; (D.f[DIR_0PM])[kbn ]=tmp;}} q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbsw] < GEO_FLUID){tmp = (DN.f[DIR_MMM])[kbsw]; (DN.f[DIR_MMM])[kbsw]=(D.f[DIR_MMM])[kbsw]; (D.f[DIR_MMM])[kbsw]=tmp;}} q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_PPP])[ktne]; (DN.f[DIR_PPP])[ktne]=(D.f[DIR_PPP])[ktne]; (D.f[DIR_PPP])[ktne]=tmp;}} q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1){ {tmp = (DN.f[DIR_MMP])[ktsw]; (DN.f[DIR_MMP])[ktsw]=(D.f[DIR_MMP])[ktsw]; (D.f[DIR_MMP])[ktsw]=tmp;}} diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu index f8cf8ab13c39d55477bf006cd27f7943dcb5b53a..3f440454ef272b13c24fe2a2882d67d32d32a841 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu @@ -9,14 +9,16 @@ /* Device code */ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" #include "lbm/MacroscopicQuantities.h" #include "../Kernel/Utilities/DistributionHelper.cuh" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void CalcTurbulenceIntensity( @@ -34,19 +36,21 @@ __global__ void CalcTurbulenceIntensity( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { - const unsigned k = vf::gpu::getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if (k >= size_Mat) + if (nodeIndex >= numberOfLBnodes) return; - if (!vf::gpu::isValidFluidNode(typeOfGridNode[k])) + if (!isValidFluidNode(typeOfGridNode[nodeIndex])) return; - vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, - neighborZ); + DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ); const auto &distribution = distr_wrapper.distribution; // analogue to LBCalcMacCompSP27 @@ -58,16 +62,16 @@ __global__ void CalcTurbulenceIntensity( // compute subtotals: // fluctuations - vxx[k] = vxx[k] + vx * vx; - vyy[k] = vyy[k] + vy * vy; - vzz[k] = vzz[k] + vz * vz; - vxy[k] = vxy[k] + vx * vy; - vxz[k] = vxz[k] + vx * vz; - vyz[k] = vyz[k] + vy * vz; + vxx[nodeIndex] = vxx[nodeIndex] + vx * vx; + vyy[nodeIndex] = vyy[nodeIndex] + vy * vy; + vzz[nodeIndex] = vzz[nodeIndex] + vz * vz; + vxy[nodeIndex] = vxy[nodeIndex] + vx * vy; + vxz[nodeIndex] = vxz[nodeIndex] + vx * vz; + vyz[nodeIndex] = vyz[nodeIndex] + vy * vz; // velocity (for mean velocity) - vx_mean[k] = vx_mean[k] + vx; - vy_mean[k] = vy_mean[k] + vy; - vz_mean[k] = vz_mean[k] + vz; + vx_mean[nodeIndex] = vx_mean[nodeIndex] + vx; + vy_mean[nodeIndex] = vy_mean[nodeIndex] + vy; + vz_mean[nodeIndex] = vz_mean[nodeIndex] + vz; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh index eb301515527a9e8a3056676b0d4dffe8197c7dbe..58856f624fa1dfd2488c3061721e9dac53a67d07 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh @@ -53,8 +53,8 @@ __inline__ __device__ real calcTurbulentViscosityQR(real C, real dxux, real dyuy //! Second invariant of the strain-rate tensor real Q = c1o2*( dxux*dxux + dyuy*dyuy + dzuz*dzuz ) + c1o4*( Dxy*Dxy + Dxz*Dxz + Dyz*Dyz); //! Third invariant of the strain-rate tensor (determinant) - real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy ); - + // real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy ); + real R = - dxux*dyuy*dzuz + c1o4*( -Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy ); return C * max(R, c0o1) / Q; } diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu index 3719ca3712e6f63a77f62bf314af7d19eea01f4c..7147629c448b8b730e4ae8c4eff8a0a400863de9 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu @@ -38,6 +38,7 @@ #include <cuda_runtime.h> #include <helper_cuda.h> #include "LBM/LB.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; @@ -52,34 +53,31 @@ __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM dvz = ((fluidP ? vz[kP] : vz[k])-(fluidM ? vz[kM] : vz[k]))*div; } -__global__ void calcAMD(real* vx, - real* vy, - real* vz, - real* turbulentViscosity, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint* neighborWSB, - uint* typeOfGridNode, - uint size_Mat, - real SGSConstant) +__global__ void calcAMD( + real* vx, + real* vy, + real* vz, + real* turbulentViscosity, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* neighborWSB, + uint* typeOfGridNode, + unsigned long long numberOfLBnodes, + real SGSConstant) { + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); - const uint x = threadIdx.x; - const uint y = blockIdx.x; - const uint z = blockIdx.y; + if(nodeIndex >= numberOfLBnodes) return; + if(typeOfGridNode[nodeIndex] != GEO_FLUID) return; - const uint nx = blockDim.x; - const uint ny = gridDim.x; - - const uint k = nx*(ny*z + y) + x; - if(k >= size_Mat) return; - if(typeOfGridNode[k] != GEO_FLUID) return; - - uint kPx = neighborX[k]; - uint kPy = neighborY[k]; - uint kPz = neighborZ[k]; - uint kMxyz = neighborWSB[k]; + uint kPx = neighborX[nodeIndex]; + uint kPy = neighborY[nodeIndex]; + uint kPz = neighborZ[nodeIndex]; + uint kMxyz = neighborWSB[nodeIndex]; uint kMx = neighborZ[neighborY[kMxyz]]; uint kMy = neighborZ[neighborX[kMxyz]]; uint kMz = neighborY[neighborX[kMxyz]]; @@ -88,9 +86,9 @@ __global__ void calcAMD(real* vx, dvydx, dvydy, dvydz, dvzdx, dvzdy, dvzdz; - calcDerivatives(k, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx); - calcDerivatives(k, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy); - calcDerivatives(k, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz); + calcDerivatives(nodeIndex, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx); + calcDerivatives(nodeIndex, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy); + calcDerivatives(nodeIndex, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz); real denominator = dvxdx*dvxdx + dvydx*dvydx + dvzdx*dvzdx + dvxdy*dvxdy + dvydy*dvydy + dvzdy*dvzdy + @@ -102,7 +100,7 @@ __global__ void calcAMD(real* vx, (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy); - turbulentViscosity[k] = max(c0o1,-SGSConstant*enumerator)/denominator; + turbulentViscosity[nodeIndex] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1; } void calcTurbulentViscosityAMD(Parameter* para, int level) diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu index 05c85e8b546aeaa964b1dbb61cbf01dd9b82ca1a..ccf9d1771ec0e1895e5cb79fae63675429b02c73 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu @@ -1,96 +1,120 @@ -// _ ___ __ __________ _ __ ______________ __ -// | | / (_)____/ /___ ______ _/ / ____/ /_ __(_)___/ /____ / ___/ __ / / / / -// | | / / / ___/ __/ / / / __ `/ / /_ / / / / / / __ / ___/ / /___/ /_/ / / / / -// | |/ / / / / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__ ) / /_) / ____/ /__/ / -// |___/_/_/ \__/\__,_/\__,_/_/_/ /_/\__,_/_/\__,_/____/ \____/_/ \_____/ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -////////////////////////////////////////////////////////////////////////// - -/* Device code */ +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file VelocityBCs27.cu +//! \ingroup GPU +//! \author Martin Schoenherr, Anna Wellmann +//====================================================================================== #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" #include "lbm/constants/NumericConstants.h" -#include "KernelUtilities.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; +using namespace vf::gpu; ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompPlusSlip27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -115,24 +139,24 @@ __global__ void QVelDeviceCompPlusSlip27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -225,63 +249,63 @@ __global__ void QVelDeviceCompPlusSlip27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep == false) { - D.f[DIR_P00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -553,18 +577,19 @@ __global__ void QVelDeviceCompPlusSlip27( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void QVeloDeviceEQ27(real* VeloX, - real* VeloY, - real* VeloZ, - real* DD, - int* k_Q, - int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QVeloDeviceEQ27( + real* VeloX, + real* VeloY, + real* VeloZ, + real* DD, + int* k_Q, + int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -613,95 +638,95 @@ __global__ void QVeloDeviceEQ27(real* VeloX, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // based on BGK Plus Comp ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[ke ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[kn ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[kt ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[kne ]; - real mfaab = (D.f[DIR_MM0 ])[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[kse ]; - real mfacb = (D.f[DIR_MP0 ])[knw ]; - real mfcbc = (D.f[DIR_P0P ])[kte ]; - real mfaba = (D.f[DIR_M0M ])[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kbe ]; - real mfabc = (D.f[DIR_M0P ])[ktw ]; - real mfbcc = (D.f[DIR_0PP ])[ktn ]; - real mfbaa = (D.f[DIR_0MM ])[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kbn ]; - real mfbac = (D.f[DIR_0MP ])[kts ]; + real mfcbb = (D.f[DIR_P00])[ke ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[kn ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[kt ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[kne ]; + real mfaab = (D.f[DIR_MM0])[ksw ]; + real mfcab = (D.f[DIR_PM0])[kse ]; + real mfacb = (D.f[DIR_MP0])[knw ]; + real mfcbc = (D.f[DIR_P0P])[kte ]; + real mfaba = (D.f[DIR_M0M])[kbw ]; + real mfcba = (D.f[DIR_P0M])[kbe ]; + real mfabc = (D.f[DIR_M0P])[ktw ]; + real mfbcc = (D.f[DIR_0PP])[ktn ]; + real mfbaa = (D.f[DIR_0MM])[kbs ]; + real mfbca = (D.f[DIR_0PM])[kbn ]; + real mfbac = (D.f[DIR_0MP])[kts ]; real mfbbb = (D.f[DIR_000])[kzero]; - real mfccc = (D.f[DIR_PPP ])[ktne ]; - real mfaac = (D.f[DIR_MMP ])[ktsw ]; - real mfcac = (D.f[DIR_PMP ])[ktse ]; - real mfacc = (D.f[DIR_MPP ])[ktnw ]; - real mfcca = (D.f[DIR_PPM ])[kbne ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw ]; - real mfcaa = (D.f[DIR_PMM ])[kbse ]; - real mfaca = (D.f[DIR_MPM ])[kbnw ]; + real mfccc = (D.f[DIR_PPP])[ktne ]; + real mfaac = (D.f[DIR_MMP])[ktsw ]; + real mfcac = (D.f[DIR_PMP])[ktse ]; + real mfacc = (D.f[DIR_MPP])[ktnw ]; + real mfcca = (D.f[DIR_PPM])[kbne ]; + real mfaaa = (D.f[DIR_MMM])[kbsw ]; + real mfcaa = (D.f[DIR_PMM])[kbse ]; + real mfaca = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////////// real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb + @@ -763,33 +788,33 @@ __global__ void QVeloDeviceEQ27(real* VeloX, mfcaa = -rho * XXc * YYa * ZZa - c1o216; mfaca = -rho * XXa * YYc * ZZa - c1o216; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00 ])[ke ] = mfabb;//mfcbb; - (D.f[DIR_M00 ])[kw ] = mfcbb;//mfabb; - (D.f[DIR_0P0 ])[kn ] = mfbab;//mfbcb; - (D.f[DIR_0M0 ])[ks ] = mfbcb;//mfbab; - (D.f[DIR_00P ])[kt ] = mfbba;//mfbbc; - (D.f[DIR_00M ])[kb ] = mfbbc;//mfbba; - (D.f[DIR_PP0 ])[kne ] = mfaab;//mfccb; - (D.f[DIR_MM0 ])[ksw ] = mfccb;//mfaab; - (D.f[DIR_PM0 ])[kse ] = mfacb;//mfcab; - (D.f[DIR_MP0 ])[knw ] = mfcab;//mfacb; - (D.f[DIR_P0P ])[kte ] = mfaba;//mfcbc; - (D.f[DIR_M0M ])[kbw ] = mfcbc;//mfaba; - (D.f[DIR_P0M ])[kbe ] = mfabc;//mfcba; - (D.f[DIR_M0P ])[ktw ] = mfcba;//mfabc; - (D.f[DIR_0PP ])[ktn ] = mfbaa;//mfbcc; - (D.f[DIR_0MM ])[kbs ] = mfbcc;//mfbaa; - (D.f[DIR_0PM ])[kbn ] = mfbac;//mfbca; - (D.f[DIR_0MP ])[kts ] = mfbca;//mfbac; + (D.f[DIR_P00])[ke ] = mfabb;//mfcbb; + (D.f[DIR_M00])[kw ] = mfcbb;//mfabb; + (D.f[DIR_0P0])[kn ] = mfbab;//mfbcb; + (D.f[DIR_0M0])[ks ] = mfbcb;//mfbab; + (D.f[DIR_00P])[kt ] = mfbba;//mfbbc; + (D.f[DIR_00M])[kb ] = mfbbc;//mfbba; + (D.f[DIR_PP0])[kne ] = mfaab;//mfccb; + (D.f[DIR_MM0])[ksw ] = mfccb;//mfaab; + (D.f[DIR_PM0])[kse ] = mfacb;//mfcab; + (D.f[DIR_MP0])[knw ] = mfcab;//mfacb; + (D.f[DIR_P0P])[kte ] = mfaba;//mfcbc; + (D.f[DIR_M0M])[kbw ] = mfcbc;//mfaba; + (D.f[DIR_P0M])[kbe ] = mfabc;//mfcba; + (D.f[DIR_M0P])[ktw ] = mfcba;//mfabc; + (D.f[DIR_0PP])[ktn ] = mfbaa;//mfbcc; + (D.f[DIR_0MM])[kbs ] = mfbcc;//mfbaa; + (D.f[DIR_0PM])[kbn ] = mfbac;//mfbca; + (D.f[DIR_0MP])[kts ] = mfbca;//mfbac; (D.f[DIR_000])[kzero] = mfbbb;//mfbbb; - (D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc; - (D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac; - (D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac; - (D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc; - (D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca; - (D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa; - (D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa; - (D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca; + (D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc; + (D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac; + (D.f[DIR_PMP])[ktse ] = mfaca;//mfcac; + (D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc; + (D.f[DIR_PPM])[kbne ] = mfaac;//mfcca; + (D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa; + (D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa; + (D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -834,18 +859,18 @@ __global__ void QVeloDeviceEQ27(real* VeloX, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// __global__ void QVeloStreetDeviceEQ27( - real* veloXfraction, - real* veloYfraction, - int* naschVelo, - real* DD, - int* naschIndex, - int numberOfStreetNodes, - real velocityRatio, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint size_Mat, - bool isEvenTimestep) + real* veloXfraction, + real* veloYfraction, + int* naschVelo, + real* DD, + int* naschIndex, + int numberOfStreetNodes, + real velocityRatio, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -894,95 +919,95 @@ __global__ void QVeloStreetDeviceEQ27( Distributions27 D; if (isEvenTimestep == true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // based on BGK Plus Comp ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[ke ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[kn ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[kt ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[kne ]; - real mfaab = (D.f[DIR_MM0 ])[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[kse ]; - real mfacb = (D.f[DIR_MP0 ])[knw ]; - real mfcbc = (D.f[DIR_P0P ])[kte ]; - real mfaba = (D.f[DIR_M0M ])[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kbe ]; - real mfabc = (D.f[DIR_M0P ])[ktw ]; - real mfbcc = (D.f[DIR_0PP ])[ktn ]; - real mfbaa = (D.f[DIR_0MM ])[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kbn ]; - real mfbac = (D.f[DIR_0MP ])[kts ]; + real mfcbb = (D.f[DIR_P00])[ke ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[kn ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[kt ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[kne ]; + real mfaab = (D.f[DIR_MM0])[ksw ]; + real mfcab = (D.f[DIR_PM0])[kse ]; + real mfacb = (D.f[DIR_MP0])[knw ]; + real mfcbc = (D.f[DIR_P0P])[kte ]; + real mfaba = (D.f[DIR_M0M])[kbw ]; + real mfcba = (D.f[DIR_P0M])[kbe ]; + real mfabc = (D.f[DIR_M0P])[ktw ]; + real mfbcc = (D.f[DIR_0PP])[ktn ]; + real mfbaa = (D.f[DIR_0MM])[kbs ]; + real mfbca = (D.f[DIR_0PM])[kbn ]; + real mfbac = (D.f[DIR_0MP])[kts ]; real mfbbb = (D.f[DIR_000])[kzero]; - real mfccc = (D.f[DIR_PPP ])[ktne ]; - real mfaac = (D.f[DIR_MMP ])[ktsw ]; - real mfcac = (D.f[DIR_PMP ])[ktse ]; - real mfacc = (D.f[DIR_MPP ])[ktnw ]; - real mfcca = (D.f[DIR_PPM ])[kbne ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw ]; - real mfcaa = (D.f[DIR_PMM ])[kbse ]; - real mfaca = (D.f[DIR_MPM ])[kbnw ]; + real mfccc = (D.f[DIR_PPP])[ktne ]; + real mfaac = (D.f[DIR_MMP])[ktsw ]; + real mfcac = (D.f[DIR_PMP])[ktse ]; + real mfacc = (D.f[DIR_MPP])[ktnw ]; + real mfcca = (D.f[DIR_PPM])[kbne ]; + real mfaaa = (D.f[DIR_MMM])[kbsw ]; + real mfcaa = (D.f[DIR_PMM])[kbse ]; + real mfaca = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////////// real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca + mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb + @@ -1049,33 +1074,33 @@ __global__ void QVeloStreetDeviceEQ27( mfcaa = -rho * XXc * YYa * ZZa - c1o216; mfaca = -rho * XXa * YYc * ZZa - c1o216; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00 ])[ke ] = mfabb;//mfcbb; - (D.f[DIR_M00 ])[kw ] = mfcbb;//mfabb; - (D.f[DIR_0P0 ])[kn ] = mfbab;//mfbcb; - (D.f[DIR_0M0 ])[ks ] = mfbcb;//mfbab; - (D.f[DIR_00P ])[kt ] = mfbba;//mfbbc; - (D.f[DIR_00M ])[kb ] = mfbbc;//mfbba; - (D.f[DIR_PP0 ])[kne ] = mfaab;//mfccb; - (D.f[DIR_MM0 ])[ksw ] = mfccb;//mfaab; - (D.f[DIR_PM0 ])[kse ] = mfacb;//mfcab; - (D.f[DIR_MP0 ])[knw ] = mfcab;//mfacb; - (D.f[DIR_P0P ])[kte ] = mfaba;//mfcbc; - (D.f[DIR_M0M ])[kbw ] = mfcbc;//mfaba; - (D.f[DIR_P0M ])[kbe ] = mfabc;//mfcba; - (D.f[DIR_M0P ])[ktw ] = mfcba;//mfabc; - (D.f[DIR_0PP ])[ktn ] = mfbaa;//mfbcc; - (D.f[DIR_0MM ])[kbs ] = mfbcc;//mfbaa; - (D.f[DIR_0PM ])[kbn ] = mfbac;//mfbca; - (D.f[DIR_0MP ])[kts ] = mfbca;//mfbac; + (D.f[DIR_P00])[ke ] = mfabb;//mfcbb; + (D.f[DIR_M00])[kw ] = mfcbb;//mfabb; + (D.f[DIR_0P0])[kn ] = mfbab;//mfbcb; + (D.f[DIR_0M0])[ks ] = mfbcb;//mfbab; + (D.f[DIR_00P])[kt ] = mfbba;//mfbbc; + (D.f[DIR_00M])[kb ] = mfbbc;//mfbba; + (D.f[DIR_PP0])[kne ] = mfaab;//mfccb; + (D.f[DIR_MM0])[ksw ] = mfccb;//mfaab; + (D.f[DIR_PM0])[kse ] = mfacb;//mfcab; + (D.f[DIR_MP0])[knw ] = mfcab;//mfacb; + (D.f[DIR_P0P])[kte ] = mfaba;//mfcbc; + (D.f[DIR_M0M])[kbw ] = mfcbc;//mfaba; + (D.f[DIR_P0M])[kbe ] = mfabc;//mfcba; + (D.f[DIR_M0P])[ktw ] = mfcba;//mfabc; + (D.f[DIR_0PP])[ktn ] = mfbaa;//mfbcc; + (D.f[DIR_0MM])[kbs ] = mfbcc;//mfbaa; + (D.f[DIR_0PM])[kbn ] = mfbac;//mfbca; + (D.f[DIR_0MP])[kts ] = mfbca;//mfbac; (D.f[DIR_000])[kzero] = mfbbb;//mfbbb; - (D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc; - (D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac; - (D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac; - (D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc; - (D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca; - (D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa; - (D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa; - (D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca; + (D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc; + (D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac; + (D.f[DIR_PMP])[ktse ] = mfaca;//mfcac; + (D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc; + (D.f[DIR_PPM])[kbne ] = mfaac;//mfcca; + (D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa; + (D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa; + (D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1120,80 +1145,80 @@ __global__ void QVeloStreetDeviceEQ27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceIncompHighNu27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1218,24 +1243,24 @@ __global__ void QVelDeviceIncompHighNu27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1278,32 +1303,32 @@ __global__ void QVelDeviceIncompHighNu27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -1328,63 +1353,63 @@ __global__ void QVelDeviceIncompHighNu27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -1618,80 +1643,80 @@ __global__ void QVelDeviceIncompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompHighNu27( - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -1716,24 +1741,24 @@ __global__ void QVelDeviceCompHighNu27( *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -1776,58 +1801,58 @@ __global__ void QVelDeviceCompHighNu27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; - //f_W = (D.f[DIR_P00 ])[ke ]; - //f_E = (D.f[DIR_M00 ])[kw ]; - //f_S = (D.f[DIR_0P0 ])[kn ]; - //f_N = (D.f[DIR_0M0 ])[ks ]; - //f_B = (D.f[DIR_00P ])[kt ]; - //f_T = (D.f[DIR_00M ])[kb ]; - //f_SW = (D.f[DIR_PP0 ])[kne ]; - //f_NE = (D.f[DIR_MM0 ])[ksw ]; - //f_NW = (D.f[DIR_PM0 ])[kse ]; - //f_SE = (D.f[DIR_MP0 ])[knw ]; - //f_BW = (D.f[DIR_P0P ])[kte ]; - //f_TE = (D.f[DIR_M0M ])[kbw ]; - //f_TW = (D.f[DIR_P0M ])[kbe ]; - //f_BE = (D.f[DIR_M0P ])[ktw ]; - //f_BS = (D.f[DIR_0PP ])[ktn ]; - //f_TN = (D.f[DIR_0MM ])[kbs ]; - //f_TS = (D.f[DIR_0PM ])[kbn ]; - //f_BN = (D.f[DIR_0MP ])[kts ]; - //f_BSW = (D.f[DIR_PPP ])[ktne ]; - //f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //f_BNW = (D.f[DIR_PMP ])[ktse ]; - //f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //f_TSW = (D.f[DIR_PPM ])[kbne ]; - //f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //f_TNW = (D.f[DIR_PMM ])[kbse ]; - //f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; + //f_W = (D.f[DIR_P00])[ke ]; + //f_E = (D.f[DIR_M00])[kw ]; + //f_S = (D.f[DIR_0P0])[kn ]; + //f_N = (D.f[DIR_0M0])[ks ]; + //f_B = (D.f[DIR_00P])[kt ]; + //f_T = (D.f[DIR_00M])[kb ]; + //f_SW = (D.f[DIR_PP0])[kne ]; + //f_NE = (D.f[DIR_MM0])[ksw ]; + //f_NW = (D.f[DIR_PM0])[kse ]; + //f_SE = (D.f[DIR_MP0])[knw ]; + //f_BW = (D.f[DIR_P0P])[kte ]; + //f_TE = (D.f[DIR_M0M])[kbw ]; + //f_TW = (D.f[DIR_P0M])[kbe ]; + //f_BE = (D.f[DIR_M0P])[ktw ]; + //f_BS = (D.f[DIR_0PP])[ktn ]; + //f_TN = (D.f[DIR_0MM])[kbs ]; + //f_TS = (D.f[DIR_0PM])[kbn ]; + //f_BN = (D.f[DIR_0MP])[kts ]; + //f_BSW = (D.f[DIR_PPP])[ktne ]; + //f_BNE = (D.f[DIR_MMP])[ktsw ]; + //f_BNW = (D.f[DIR_PMP])[ktse ]; + //f_BSE = (D.f[DIR_MPP])[ktnw ]; + //f_TSW = (D.f[DIR_PPM])[kbne ]; + //f_TNE = (D.f[DIR_MMM])[kbsw ]; + //f_TNW = (D.f[DIR_PMM])[kbse ]; + //f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -1852,63 +1877,63 @@ __global__ void QVelDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -2194,39 +2219,32 @@ __global__ void QVelDeviceCompHighNu27( ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceCompZeroPress27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distribution, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distribution, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// - //! The velocity boundary condition is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + //! The velocity boundary condition is executed in the following steps + //! + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// @@ -2239,9 +2257,9 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// @@ -2253,7 +2271,7 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int KQK = subgridDistanceIndices[k]; + unsigned int KQK = subgridDistanceIndices[nodeIndex]; unsigned int kzero= KQK; unsigned int ke = KQK; unsigned int kw = neighborX[KQK]; @@ -2285,32 +2303,32 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -2342,7 +2360,7 @@ __global__ void QVelDeviceCompZeroPress27( //////////////////////////////////////////////////////////////////////////////// //! - Update distributions with subgrid distance (q) between zero and one real feq, q, velocityLB, velocityBC; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -2351,7 +2369,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -2360,7 +2378,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -2369,7 +2387,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -2378,7 +2396,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -2387,7 +2405,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -2396,7 +2414,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -2405,7 +2423,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -2414,7 +2432,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -2423,7 +2441,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -2432,7 +2450,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -2441,7 +2459,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -2450,7 +2468,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -2459,7 +2477,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -2468,7 +2486,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -2477,7 +2495,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -2486,7 +2504,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -2495,7 +2513,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -2504,7 +2522,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -2513,7 +2531,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -2522,7 +2540,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -2531,7 +2549,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -2540,7 +2558,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -2549,7 +2567,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -2558,7 +2576,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -2567,7 +2585,7 @@ __global__ void QVelDeviceCompZeroPress27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -2619,87 +2637,88 @@ __global__ void QVelDeviceCompZeroPress27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDeviceCompZeroPress1h27( int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QVelDeviceCompZeroPress1h27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -2738,24 +2757,24 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -2797,63 +2816,63 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q, cu_sq; @@ -3090,21 +3109,22 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx, ////////////////////////////////////////////////////////////////////////////// -__global__ void LB_BC_Vel_West_27( int nx, - int ny, - int nz, - int itz, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DD, - unsigned int size_Mat, - bool isEvenTimestep, - real u0x, - unsigned int grid_nx, - unsigned int grid_ny, - real om) +__global__ void LB_BC_Vel_West_27( + int nx, + int ny, + int nz, + int itz, + unsigned int* bcMatD, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* DD, + unsigned long long numberOfLBnodes, + bool isEvenTimestep, + real u0x, + unsigned int grid_nx, + unsigned int grid_ny, + real om) { //thread-index unsigned int ity = blockIdx.x; @@ -3125,63 +3145,63 @@ __global__ void LB_BC_Vel_West_27( int nx, Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -3300,33 +3320,33 @@ __global__ void LB_BC_Vel_West_27( int nx, real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO, f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW; - f1_W = (D.f[DIR_P00 ])[k1e ]; - f1_E = (D.f[DIR_M00 ])[k1w ]; - f1_S = (D.f[DIR_0P0 ])[k1n ]; - f1_N = (D.f[DIR_0M0 ])[k1s ]; - f1_B = (D.f[DIR_00P ])[k1t ]; - f1_T = (D.f[DIR_00M ])[k1b ]; - f1_SW = (D.f[DIR_PP0 ])[k1ne ]; - f1_NE = (D.f[DIR_MM0 ])[k1sw ]; - f1_NW = (D.f[DIR_PM0 ])[k1se ]; - f1_SE = (D.f[DIR_MP0 ])[k1nw ]; - f1_BW = (D.f[DIR_P0P ])[k1te ]; - f1_TE = (D.f[DIR_M0M ])[k1bw ]; - f1_TW = (D.f[DIR_P0M ])[k1be ]; - f1_BE = (D.f[DIR_M0P ])[k1tw ]; - f1_BS = (D.f[DIR_0PP ])[k1tn ]; - f1_TN = (D.f[DIR_0MM ])[k1bs ]; - f1_TS = (D.f[DIR_0PM ])[k1bn ]; - f1_BN = (D.f[DIR_0MP ])[k1ts ]; + f1_W = (D.f[DIR_P00])[k1e ]; + f1_E = (D.f[DIR_M00])[k1w ]; + f1_S = (D.f[DIR_0P0])[k1n ]; + f1_N = (D.f[DIR_0M0])[k1s ]; + f1_B = (D.f[DIR_00P])[k1t ]; + f1_T = (D.f[DIR_00M])[k1b ]; + f1_SW = (D.f[DIR_PP0])[k1ne ]; + f1_NE = (D.f[DIR_MM0])[k1sw ]; + f1_NW = (D.f[DIR_PM0])[k1se ]; + f1_SE = (D.f[DIR_MP0])[k1nw ]; + f1_BW = (D.f[DIR_P0P])[k1te ]; + f1_TE = (D.f[DIR_M0M])[k1bw ]; + f1_TW = (D.f[DIR_P0M])[k1be ]; + f1_BE = (D.f[DIR_M0P])[k1tw ]; + f1_BS = (D.f[DIR_0PP])[k1tn ]; + f1_TN = (D.f[DIR_0MM])[k1bs ]; + f1_TS = (D.f[DIR_0PM])[k1bn ]; + f1_BN = (D.f[DIR_0MP])[k1ts ]; f1_ZERO = (D.f[DIR_000])[k1zero]; - f1_BSW = (D.f[DIR_PPP ])[k1tne ]; - f1_BNE = (D.f[DIR_MMP ])[k1tsw ]; - f1_BNW = (D.f[DIR_PMP ])[k1tse ]; - f1_BSE = (D.f[DIR_MPP ])[k1tnw ]; - f1_TSW = (D.f[DIR_PPM ])[k1bne ]; - f1_TNE = (D.f[DIR_MMM ])[k1bsw ]; - f1_TNW = (D.f[DIR_PMM ])[k1bse ]; - f1_TSE = (D.f[DIR_MPM ])[k1bnw ]; + f1_BSW = (D.f[DIR_PPP])[k1tne ]; + f1_BNE = (D.f[DIR_MMP])[k1tsw ]; + f1_BNW = (D.f[DIR_PMP])[k1tse ]; + f1_BSE = (D.f[DIR_MPP])[k1tnw ]; + f1_TSW = (D.f[DIR_PPM])[k1bne ]; + f1_TNE = (D.f[DIR_MMM])[k1bsw ]; + f1_TNW = (D.f[DIR_PMM])[k1bse ]; + f1_TSE = (D.f[DIR_MPM])[k1bnw ]; real drho1 = f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW; @@ -3343,32 +3363,32 @@ __global__ void LB_BC_Vel_West_27( int nx, real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3); (D.f[DIR_000])[kzero] = c8o27* (drho-cu_sq); - (D.f[DIR_P00 ])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); - (D.f[DIR_M00 ])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); - (D.f[DIR_0P0 ])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); - (D.f[DIR_0M0 ])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); - (D.f[DIR_00P ])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); - (D.f[DIR_00M ])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); - (D.f[DIR_PP0 ])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - (D.f[DIR_MM0 ])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - (D.f[DIR_PM0 ])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - (D.f[DIR_MP0 ])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - (D.f[DIR_P0P ])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - (D.f[DIR_M0M ])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - (D.f[DIR_P0M ])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - (D.f[DIR_M0P ])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - (D.f[DIR_0PP ])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); - (D.f[DIR_0MM ])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - (D.f[DIR_0PM ])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); - (D.f[DIR_0MP ])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - (D.f[DIR_PPP ])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - (D.f[DIR_MMM ])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - (D.f[DIR_PPM ])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - (D.f[DIR_MMP ])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - (D.f[DIR_PMP ])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - (D.f[DIR_MPM ])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - (D.f[DIR_PMM ])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - (D.f[DIR_MPP ])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + (D.f[DIR_P00])[ke ] = c2o27* (drho+c3o1*( vx1 )+c9o2*( vx1 )*( vx1 )-cu_sq); + (D.f[DIR_M00])[kw ] = c2o27* (drho+c3o1*(-vx1 )+c9o2*(-vx1 )*(-vx1 )-cu_sq); + (D.f[DIR_0P0])[kn ] = c2o27* (drho+c3o1*( vx2 )+c9o2*( vx2 )*( vx2 )-cu_sq); + (D.f[DIR_0M0])[ks ] = c2o27* (drho+c3o1*( -vx2 )+c9o2*( -vx2 )*( -vx2 )-cu_sq); + (D.f[DIR_00P])[kt ] = c2o27* (drho+c3o1*( vx3)+c9o2*( vx3)*( vx3)-cu_sq); + (D.f[DIR_00M])[kb ] = c2o27* (drho+c3o1*( -vx3)+c9o2*( -vx3)*( -vx3)-cu_sq); + (D.f[DIR_PP0])[kne ] = c1o54* (drho+c3o1*( vx1+vx2 )+c9o2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + (D.f[DIR_MM0])[ksw ] = c1o54* (drho+c3o1*(-vx1-vx2 )+c9o2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + (D.f[DIR_PM0])[kse ] = c1o54* (drho+c3o1*( vx1-vx2 )+c9o2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + (D.f[DIR_MP0])[knw ] = c1o54* (drho+c3o1*(-vx1+vx2 )+c9o2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + (D.f[DIR_P0P])[kte ] = c1o54* (drho+c3o1*( vx1 +vx3)+c9o2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + (D.f[DIR_M0M])[kbw ] = c1o54* (drho+c3o1*(-vx1 -vx3)+c9o2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + (D.f[DIR_P0M])[kbe ] = c1o54* (drho+c3o1*( vx1 -vx3)+c9o2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + (D.f[DIR_M0P])[ktw ] = c1o54* (drho+c3o1*(-vx1 +vx3)+c9o2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + (D.f[DIR_0PP])[ktn ] = c1o54* (drho+c3o1*( vx2+vx3)+c9o2*( vx2+vx3)*( vx2+vx3)-cu_sq); + (D.f[DIR_0MM])[kbs ] = c1o54* (drho+c3o1*( -vx2-vx3)+c9o2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + (D.f[DIR_0PM])[kbn ] = c1o54* (drho+c3o1*( vx2-vx3)+c9o2*( vx2-vx3)*( vx2-vx3)-cu_sq); + (D.f[DIR_0MP])[kts ] = c1o54* (drho+c3o1*( -vx2+vx3)+c9o2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + (D.f[DIR_PPP])[ktne ] = c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + (D.f[DIR_MMM])[kbsw ] = c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + (D.f[DIR_PPM])[kbne ] = c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + (D.f[DIR_MMP])[ktsw ] = c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + (D.f[DIR_PMP])[ktse ] = c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + (D.f[DIR_MPM])[kbnw ] = c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + (D.f[DIR_PMM])[kbse ] = c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + (D.f[DIR_MPP])[ktnw ] = c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); } __syncthreads(); } @@ -3414,18 +3434,18 @@ __global__ void LB_BC_Vel_West_27( int nx, ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDevPlainBB27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - uint numberOfBCnodes, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - uint numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + uint numberOfBCnodes, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The velocity boundary condition is executed in the following steps @@ -3433,18 +3453,11 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// // run for all indices in size of boundary condition (numberOfBCnodes) - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -3456,9 +3469,9 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set local subgrid distances (q's) @@ -3469,7 +3482,7 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - uint indexOfBCnode = subgridDistanceIndices[k]; + uint indexOfBCnode = subgridDistanceIndices[nodeIndex]; uint ke = indexOfBCnode; uint kw = neighborX[indexOfBCnode]; uint kn = indexOfBCnode; @@ -3500,32 +3513,32 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - change the pointer to write the results in the correct array @@ -3535,32 +3548,32 @@ __global__ void QVelDevPlainBB27( //////////////////////////////////////////////////////////////////////////////// //! - rewrite distributions if there is a sub-grid distance (q) in same direction real q; - q = (subgridD.q[DIR_P00 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00 ])[kw ]=f_E + c4o9 * (-VeloX); - q = (subgridD.q[DIR_M00 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00 ])[ke ]=f_W + c4o9 * ( VeloX); - q = (subgridD.q[DIR_0P0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0 ])[ks ]=f_N + c4o9 * (-VeloY); - q = (subgridD.q[DIR_0M0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0 ])[kn ]=f_S + c4o9 * ( VeloY); - q = (subgridD.q[DIR_00P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M ])[kb ]=f_T + c4o9 * (-VeloZ); - q = (subgridD.q[DIR_00M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P ])[kt ]=f_B + c4o9 * ( VeloZ); - q = (subgridD.q[DIR_PP0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0 ])[ksw ]=f_NE + c1o9 * (-VeloX - VeloY); - q = (subgridD.q[DIR_MM0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0 ])[kne ]=f_SW + c1o9 * ( VeloX + VeloY); - q = (subgridD.q[DIR_PM0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0 ])[knw ]=f_SE + c1o9 * (-VeloX + VeloY); - q = (subgridD.q[DIR_MP0 ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0 ])[kse ]=f_NW + c1o9 * ( VeloX - VeloY); - q = (subgridD.q[DIR_P0P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M ])[kbw ]=f_TE + c1o9 * (-VeloX - VeloZ); - q = (subgridD.q[DIR_M0M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P ])[kte ]=f_BW + c1o9 * ( VeloX + VeloZ); - q = (subgridD.q[DIR_P0M ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P ])[ktw ]=f_BE + c1o9 * (-VeloX + VeloZ); - q = (subgridD.q[DIR_M0P ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M ])[kbe ]=f_TW + c1o9 * ( VeloX - VeloZ); - q = (subgridD.q[DIR_0PP ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM ])[kbs ]=f_TN + c1o9 * (-VeloY - VeloZ); - q = (subgridD.q[DIR_0MM ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP ])[ktn ]=f_BS + c1o9 * ( VeloY + VeloZ); - q = (subgridD.q[DIR_0PM ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP ])[kts ]=f_BN + c1o9 * (-VeloY + VeloZ); - q = (subgridD.q[DIR_0MP ])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM ])[kbn ]=f_TS + c1o9 * ( VeloY - VeloZ); - q = (subgridD.q[DIR_PPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ); - q = (subgridD.q[DIR_MMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ); - q = (subgridD.q[DIR_PPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ); - q = (subgridD.q[DIR_MMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ); - q = (subgridD.q[DIR_PMP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ); - q = (subgridD.q[DIR_MPM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ); - q = (subgridD.q[DIR_PMM])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ); - q = (subgridD.q[DIR_MPP])[k]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ); + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M00])[kw ]=f_E + c4o9 * (-VeloX); + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P00])[ke ]=f_W + c4o9 * ( VeloX); + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0M0])[ks ]=f_N + c4o9 * (-VeloY); + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0P0])[kn ]=f_S + c4o9 * ( VeloY); + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00M])[kb ]=f_T + c4o9 * (-VeloZ); + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_00P])[kt ]=f_B + c4o9 * ( VeloZ); + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MM0])[ksw ]=f_NE + c1o9 * (-VeloX - VeloY); + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PP0])[kne ]=f_SW + c1o9 * ( VeloX + VeloY); + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MP0])[knw ]=f_SE + c1o9 * (-VeloX + VeloY); + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PM0])[kse ]=f_NW + c1o9 * ( VeloX - VeloY); + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0M])[kbw ]=f_TE + c1o9 * (-VeloX - VeloZ); + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0P])[kte ]=f_BW + c1o9 * ( VeloX + VeloZ); + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_M0P])[ktw ]=f_BE + c1o9 * (-VeloX + VeloZ); + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_P0M])[kbe ]=f_TW + c1o9 * ( VeloX - VeloZ); + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MM])[kbs ]=f_TN + c1o9 * (-VeloY - VeloZ); + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PP])[ktn ]=f_BS + c1o9 * ( VeloY + VeloZ); + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0MP])[kts ]=f_BN + c1o9 * (-VeloY + VeloZ); + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_0PM])[kbn ]=f_TS + c1o9 * ( VeloY - VeloZ); + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ); + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ); + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ); + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ); + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ); + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ); + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ); + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3604,80 +3617,81 @@ __global__ void QVelDevPlainBB27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDevCouette27(real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QVelDevCouette27( + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -3702,24 +3716,24 @@ __global__ void QVelDevCouette27(real* vx, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -3761,94 +3775,94 @@ __global__ void QVelDevCouette27(real* vx, //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// - real f_W = (D.f[DIR_P00 ])[ke ]; - real f_E = (D.f[DIR_M00 ])[kw ]; - real f_S = (D.f[DIR_0P0 ])[kn ]; - real f_N = (D.f[DIR_0M0 ])[ks ]; - real f_B = (D.f[DIR_00P ])[kt ]; - real f_T = (D.f[DIR_00M ])[kb ]; - real f_SW = (D.f[DIR_PP0 ])[kne ]; - real f_NE = (D.f[DIR_MM0 ])[ksw ]; - real f_NW = (D.f[DIR_PM0 ])[kse ]; - real f_SE = (D.f[DIR_MP0 ])[knw ]; - real f_BW = (D.f[DIR_P0P ])[kte ]; - real f_TE = (D.f[DIR_M0M ])[kbw ]; - real f_TW = (D.f[DIR_P0M ])[kbe ]; - real f_BE = (D.f[DIR_M0P ])[ktw ]; - real f_BS = (D.f[DIR_0PP ])[ktn ]; - real f_TN = (D.f[DIR_0MM ])[kbs ]; - real f_TS = (D.f[DIR_0PM ])[kbn ]; - real f_BN = (D.f[DIR_0MP ])[kts ]; - real f_BSW = (D.f[DIR_PPP ])[ktne ]; - real f_BNE = (D.f[DIR_MMP ])[ktsw ]; - real f_BNW = (D.f[DIR_PMP ])[ktse ]; - real f_BSE = (D.f[DIR_MPP ])[ktnw ]; - real f_TSW = (D.f[DIR_PPM ])[kbne ]; - real f_TNE = (D.f[DIR_MMM ])[kbsw ]; - real f_TNW = (D.f[DIR_PMM ])[kbse ]; - real f_TSE = (D.f[DIR_MPM ])[kbnw ]; + real f_W = (D.f[DIR_P00])[ke ]; + real f_E = (D.f[DIR_M00])[kw ]; + real f_S = (D.f[DIR_0P0])[kn ]; + real f_N = (D.f[DIR_0M0])[ks ]; + real f_B = (D.f[DIR_00P])[kt ]; + real f_T = (D.f[DIR_00M])[kb ]; + real f_SW = (D.f[DIR_PP0])[kne ]; + real f_NE = (D.f[DIR_MM0])[ksw ]; + real f_NW = (D.f[DIR_PM0])[kse ]; + real f_SE = (D.f[DIR_MP0])[knw ]; + real f_BW = (D.f[DIR_P0P])[kte ]; + real f_TE = (D.f[DIR_M0M])[kbw ]; + real f_TW = (D.f[DIR_P0M])[kbe ]; + real f_BE = (D.f[DIR_M0P])[ktw ]; + real f_BS = (D.f[DIR_0PP])[ktn ]; + real f_TN = (D.f[DIR_0MM])[kbs ]; + real f_TS = (D.f[DIR_0PM])[kbn ]; + real f_BN = (D.f[DIR_0MP])[kts ]; + real f_BSW = (D.f[DIR_PPP])[ktne ]; + real f_BNE = (D.f[DIR_MMP])[ktsw ]; + real f_BNW = (D.f[DIR_PMP])[ktse ]; + real f_BSE = (D.f[DIR_MPP])[ktnw ]; + real f_TSW = (D.f[DIR_PPM])[kbne ]; + real f_TNE = (D.f[DIR_MMM])[kbsw ]; + real f_TNW = (D.f[DIR_PMM])[kbse ]; + real f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////// FlowDirection Y !!!!!!!!!! /////////////////////////////////// @@ -3868,24 +3882,24 @@ __global__ void QVelDevCouette27(real* vx, ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //set distributions real q; - q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M00 ])[kw ]=f_E + ms*c2o27 * VeloX; - q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P00 ])[ke ]=f_W - ms*c2o27 * VeloX; - q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0M0 ])[ks ]=f_N + ms*c2o27 * VeloY; - q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0P0 ])[kn ]=f_S - ms*c2o27 * VeloY; - q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_00M ])[kb ]=f_T + ms*c2o27 * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on; - q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_00P ])[kt ]=f_B - ms*c2o27 * VeloZ; - q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MM0 ])[ksw ]=f_NE + ms*c1o54 * VeloX + ms*c1o54 * VeloY; - q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PP0 ])[kne ]=f_SW - ms*c1o54 * VeloX - ms*c1o54 * VeloY; - q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MP0 ])[knw ]=f_SE + ms*c1o54 * VeloX - ms*c1o54 * VeloY; - q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PM0 ])[kse ]=f_NW - ms*c1o54 * VeloX + ms*c1o54 * VeloY; - q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M0M ])[kbw ]=f_TE + ms*c1o54 * VeloX + ms*c1o54 * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ; - q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P0P ])[kte ]=f_BW - ms*c1o54 * VeloX - ms*c1o54 * VeloZ; - q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M0P ])[ktw ]=f_BE + ms*c1o54 * VeloX - ms*c1o54 * VeloZ; - q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P0M ])[kbe ]=f_TW - ms*c1o54 * VeloX + ms*c1o54 * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ; - q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0MM ])[kbs ]=f_TN + ms*c1o54 * VeloY + ms*c1o54 * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ; - q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0PP ])[ktn ]=f_BS - ms*c1o54 * VeloY - ms*c1o54 * VeloZ; - q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0MP ])[kts ]=f_BN + ms*c1o54 * VeloY - ms*c1o54 * VeloZ; - q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0PM ])[kbn ]=f_TS - ms*c1o54 * VeloY + ms*c1o54 * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ; + q = q_dirE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M00])[kw ]=f_E + ms*c2o27 * VeloX; + q = q_dirW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P00])[ke ]=f_W - ms*c2o27 * VeloX; + q = q_dirN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0M0])[ks ]=f_N + ms*c2o27 * VeloY; + q = q_dirS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0P0])[kn ]=f_S - ms*c2o27 * VeloY; + q = q_dirT[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_00M])[kb ]=f_T + ms*c2o27 * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on; + q = q_dirB[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_00P])[kt ]=f_B - ms*c2o27 * VeloZ; + q = q_dirNE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MM0])[ksw ]=f_NE + ms*c1o54 * VeloX + ms*c1o54 * VeloY; + q = q_dirSW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PP0])[kne ]=f_SW - ms*c1o54 * VeloX - ms*c1o54 * VeloY; + q = q_dirSE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MP0])[knw ]=f_SE + ms*c1o54 * VeloX - ms*c1o54 * VeloY; + q = q_dirNW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PM0])[kse ]=f_NW - ms*c1o54 * VeloX + ms*c1o54 * VeloY; + q = q_dirTE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M0M])[kbw ]=f_TE + ms*c1o54 * VeloX + ms*c1o54 * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ; + q = q_dirBW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P0P])[kte ]=f_BW - ms*c1o54 * VeloX - ms*c1o54 * VeloZ; + q = q_dirBE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_M0P])[ktw ]=f_BE + ms*c1o54 * VeloX - ms*c1o54 * VeloZ; + q = q_dirTW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_P0M])[kbe ]=f_TW - ms*c1o54 * VeloX + ms*c1o54 * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ; + q = q_dirTN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0MM])[kbs ]=f_TN + ms*c1o54 * VeloY + ms*c1o54 * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ; + q = q_dirBS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0PP])[ktn ]=f_BS - ms*c1o54 * VeloY - ms*c1o54 * VeloZ; + q = q_dirBN[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0MP])[kts ]=f_BN + ms*c1o54 * VeloY - ms*c1o54 * VeloZ; + q = q_dirTS[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_0PM])[kbn ]=f_TS - ms*c1o54 * VeloY + ms*c1o54 * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ; q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on; q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PPP])[ktne]=f_BSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ; q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ; @@ -3894,24 +3908,24 @@ __global__ void QVelDevCouette27(real* vx, q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PMP])[ktse]=f_BNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ; q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_MPP])[ktnw]=f_BSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ; q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D.f[DIR_PMM])[kbse]=f_TNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on; - //q = q_dirE[k]; if (q>=zero && q<=one) (D.f[DIR_M00 ])[kw ]=f_E + ms*c2over27 * VeloX; - // q = q_dirW[k]; if (q>=zero && q<=one) (D.f[DIR_P00 ])[ke ]=f_W - ms*c2over27 * VeloX; - // q = q_dirN[k]; if (q>=zero && q<=one) (D.f[DIR_0M0 ])[ks ]=f_N + ms*c2over27 * VeloY; - // q = q_dirS[k]; if (q>=zero && q<=one) (D.f[DIR_0P0 ])[kn ]=f_S - ms*c2over27 * VeloY; - //q = q_dirT[k]; if (q>=zero && q<=one) (D.f[DIR_00M ])[kb ]=f_T + ms*c2over27 * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; - // q = q_dirB[k]; if (q>=zero && q<=one) (D.f[DIR_00P ])[kt ]=f_B - ms*c2over27 * VeloZ; - // q = q_dirNE[k]; if (q>=zero && q<=one) (D.f[DIR_MM0 ])[ksw ]=f_NE + ms*c1over54 * VeloX + ms*c1over54 * VeloY; - //q = q_dirSW[k]; if (q>=zero && q<=one) (D.f[DIR_PP0 ])[kne ]=f_SW - ms*c1over54 * VeloX - ms*c1over54 * VeloY; - //q = q_dirSE[k]; if (q>=zero && q<=one) (D.f[DIR_MP0 ])[knw ]=f_SE + ms*c1over54 * VeloX - ms*c1over54 * VeloY; - //q = q_dirNW[k]; if (q>=zero && q<=one) (D.f[DIR_PM0 ])[kse ]=f_NW - ms*c1over54 * VeloX + ms*c1over54 * VeloY; - //q = q_dirTE[k]; if (q>=zero && q<=one) (D.f[DIR_M0M ])[kbw ]=f_TE + ms*c1over54 * VeloX + ms*c1over54 * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; - //q = q_dirBW[k]; if (q>=zero && q<=one) (D.f[DIR_P0P ])[kte ]=f_BW - ms*c1over54 * VeloX - ms*c1over54 * VeloZ; - //q = q_dirBE[k]; if (q>=zero && q<=one) (D.f[DIR_M0P ])[ktw ]=f_BE + ms*c1over54 * VeloX - ms*c1over54 * VeloZ; - //q = q_dirTW[k]; if (q>=zero && q<=one) (D.f[DIR_P0M ])[kbe ]=f_TW - ms*c1over54 * VeloX + ms*c1over54 * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; - //q = q_dirTN[k]; if (q>=zero && q<=one) (D.f[DIR_0MM ])[kbs ]=f_TN + ms*c1over54 * VeloY + ms*c1over54 * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; - //q = q_dirBS[k]; if (q>=zero && q<=one) (D.f[DIR_0PP ])[ktn ]=f_BS - ms*c1over54 * VeloY - ms*c1over54 * VeloZ; - //q = q_dirBN[k]; if (q>=zero && q<=one) (D.f[DIR_0MP ])[kts ]=f_BN + ms*c1over54 * VeloY - ms*c1over54 * VeloZ; - //q = q_dirTS[k]; if (q>=zero && q<=one) (D.f[DIR_0PM ])[kbn ]=f_TS - ms*c1over54 * VeloY + ms*c1over54 * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; + //q = q_dirE[k]; if (q>=zero && q<=one) (D.f[DIR_M00])[kw ]=f_E + ms*c2over27 * VeloX; + // q = q_dirW[k]; if (q>=zero && q<=one) (D.f[DIR_P00])[ke ]=f_W - ms*c2over27 * VeloX; + // q = q_dirN[k]; if (q>=zero && q<=one) (D.f[DIR_0M0])[ks ]=f_N + ms*c2over27 * VeloY; + // q = q_dirS[k]; if (q>=zero && q<=one) (D.f[DIR_0P0])[kn ]=f_S - ms*c2over27 * VeloY; + //q = q_dirT[k]; if (q>=zero && q<=one) (D.f[DIR_00M])[kb ]=f_T + ms*c2over27 * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; + // q = q_dirB[k]; if (q>=zero && q<=one) (D.f[DIR_00P])[kt ]=f_B - ms*c2over27 * VeloZ; + // q = q_dirNE[k]; if (q>=zero && q<=one) (D.f[DIR_MM0])[ksw ]=f_NE + ms*c1over54 * VeloX + ms*c1over54 * VeloY; + //q = q_dirSW[k]; if (q>=zero && q<=one) (D.f[DIR_PP0])[kne ]=f_SW - ms*c1over54 * VeloX - ms*c1over54 * VeloY; + //q = q_dirSE[k]; if (q>=zero && q<=one) (D.f[DIR_MP0])[knw ]=f_SE + ms*c1over54 * VeloX - ms*c1over54 * VeloY; + //q = q_dirNW[k]; if (q>=zero && q<=one) (D.f[DIR_PM0])[kse ]=f_NW - ms*c1over54 * VeloX + ms*c1over54 * VeloY; + //q = q_dirTE[k]; if (q>=zero && q<=one) (D.f[DIR_M0M])[kbw ]=f_TE + ms*c1over54 * VeloX + ms*c1over54 * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; + //q = q_dirBW[k]; if (q>=zero && q<=one) (D.f[DIR_P0P])[kte ]=f_BW - ms*c1over54 * VeloX - ms*c1over54 * VeloZ; + //q = q_dirBE[k]; if (q>=zero && q<=one) (D.f[DIR_M0P])[ktw ]=f_BE + ms*c1over54 * VeloX - ms*c1over54 * VeloZ; + //q = q_dirTW[k]; if (q>=zero && q<=one) (D.f[DIR_P0M])[kbe ]=f_TW - ms*c1over54 * VeloX + ms*c1over54 * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; + //q = q_dirTN[k]; if (q>=zero && q<=one) (D.f[DIR_0MM])[kbs ]=f_TN + ms*c1over54 * VeloY + ms*c1over54 * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; + //q = q_dirBS[k]; if (q>=zero && q<=one) (D.f[DIR_0PP])[ktn ]=f_BS - ms*c1over54 * VeloY - ms*c1over54 * VeloZ; + //q = q_dirBN[k]; if (q>=zero && q<=one) (D.f[DIR_0MP])[kts ]=f_BN + ms*c1over54 * VeloY - ms*c1over54 * VeloZ; + //q = q_dirTS[k]; if (q>=zero && q<=one) (D.f[DIR_0PM])[kbn ]=f_TS - ms*c1over54 * VeloY + ms*c1over54 * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; // q = q_dirTNE[k]; if (q>=zero && q<=one) (D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on; // q = q_dirBSW[k]; if (q>=zero && q<=one) (D.f[DIR_PPP])[ktne]=f_BSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ; // q = q_dirBNE[k]; if (q>=zero && q<=one) (D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ; @@ -3964,87 +3978,88 @@ __global__ void QVelDevCouette27(real* vx, ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDev1h27( int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - real Phi, - real angularVelocity, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* coordX, - real* coordY, - real* coordZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QVelDev1h27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + real Phi, + real angularVelocity, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* coordX, + real* coordY, + real* coordZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -4079,24 +4094,24 @@ __global__ void QVelDev1h27( int inx, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -4167,32 +4182,32 @@ __global__ void QVelDev1h27( int inx, //real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, // f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - //f_W = (D.f[DIR_P00 ])[ke ]; - //f_E = (D.f[DIR_M00 ])[kw ]; - //f_S = (D.f[DIR_0P0 ])[kn ]; - //f_N = (D.f[DIR_0M0 ])[ks ]; - //f_B = (D.f[DIR_00P ])[kt ]; - //f_T = (D.f[DIR_00M ])[kb ]; - //f_SW = (D.f[DIR_PP0 ])[kne ]; - //f_NE = (D.f[DIR_MM0 ])[ksw ]; - //f_NW = (D.f[DIR_PM0 ])[kse ]; - //f_SE = (D.f[DIR_MP0 ])[knw ]; - //f_BW = (D.f[DIR_P0P ])[kte ]; - //f_TE = (D.f[DIR_M0M ])[kbw ]; - //f_TW = (D.f[DIR_P0M ])[kbe ]; - //f_BE = (D.f[DIR_M0P ])[ktw ]; - //f_BS = (D.f[DIR_0PP ])[ktn ]; - //f_TN = (D.f[DIR_0MM ])[kbs ]; - //f_TS = (D.f[DIR_0PM ])[kbn ]; - //f_BN = (D.f[DIR_0MP ])[kts ]; - //f_BSW = (D.f[DIR_PPP ])[ktne ]; - //f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //f_BNW = (D.f[DIR_PMP ])[ktse ]; - //f_BSE = (D.f[DIR_MPP ])[ktnw ]; - //f_TSW = (D.f[DIR_PPM ])[kbne ]; - //f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //f_TNW = (D.f[DIR_PMM ])[kbse ]; - //f_TSE = (D.f[DIR_MPM ])[kbnw ]; + //f_W = (D.f[DIR_P00])[ke ]; + //f_E = (D.f[DIR_M00])[kw ]; + //f_S = (D.f[DIR_0P0])[kn ]; + //f_N = (D.f[DIR_0M0])[ks ]; + //f_B = (D.f[DIR_00P])[kt ]; + //f_T = (D.f[DIR_00M])[kb ]; + //f_SW = (D.f[DIR_PP0])[kne ]; + //f_NE = (D.f[DIR_MM0])[ksw ]; + //f_NW = (D.f[DIR_PM0])[kse ]; + //f_SE = (D.f[DIR_MP0])[knw ]; + //f_BW = (D.f[DIR_P0P])[kte ]; + //f_TE = (D.f[DIR_M0M])[kbw ]; + //f_TW = (D.f[DIR_P0M])[kbe ]; + //f_BE = (D.f[DIR_M0P])[ktw ]; + //f_BS = (D.f[DIR_0PP])[ktn ]; + //f_TN = (D.f[DIR_0MM])[kbs ]; + //f_TS = (D.f[DIR_0PM])[kbn ]; + //f_BN = (D.f[DIR_0MP])[kts ]; + //f_BSW = (D.f[DIR_PPP])[ktne ]; + //f_BNE = (D.f[DIR_MMP])[ktsw ]; + //f_BNW = (D.f[DIR_PMP])[ktse ]; + //f_BSE = (D.f[DIR_MPP])[ktnw ]; + //f_TSW = (D.f[DIR_PPM])[kbne ]; + //f_TNE = (D.f[DIR_MMM])[kbsw ]; + //f_TNW = (D.f[DIR_PMM])[kbse ]; + //f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real /*vx1, vx2,*/ vx3, drho, feq, q, cu_sq; //drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -4217,63 +4232,63 @@ __global__ void QVelDev1h27( int inx, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -4748,39 +4763,32 @@ __global__ void QVelDev1h27( int inx, ////////////////////////////////////////////////////////////////////////////// __global__ void QVelDeviceComp27( - real* velocityX, - real* velocityY, - real* velocityZ, - real* distributions, - int* subgridDistanceIndices, - real* subgridDistances, - unsigned int numberOfBCnodes, - real omega, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int numberOfLBnodes, - bool isEvenTimestep) + real* velocityX, + real* velocityY, + real* velocityZ, + real* distributions, + int* subgridDistanceIndices, + real* subgridDistances, + unsigned int numberOfBCnodes, + real omega, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { ////////////////////////////////////////////////////////////////////////// //! The velocity boundary condition is executed in the following steps //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; // global x-index - const unsigned y = blockIdx.x; // global y-index - const unsigned z = blockIdx.y; // global z-index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// //! - Run for all indices in size of boundary condition (numberOfBCnodes) //! - if(k < numberOfBCnodes) + if(nodeIndex < numberOfBCnodes) { ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref @@ -4792,9 +4800,9 @@ __global__ void QVelDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local velocities //! - real VeloX = velocityX[k]; - real VeloY = velocityY[k]; - real VeloZ = velocityZ[k]; + real VeloX = velocityX[nodeIndex]; + real VeloY = velocityY[nodeIndex]; + real VeloZ = velocityZ[nodeIndex]; //////////////////////////////////////////////////////////////////////////////// //! - Set local subgrid distances (q's) @@ -4805,7 +4813,7 @@ __global__ void QVelDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) //! - unsigned int indexOfBCnode = subgridDistanceIndices[k]; + unsigned int indexOfBCnode = subgridDistanceIndices[nodeIndex]; unsigned int kzero= indexOfBCnode; unsigned int ke = indexOfBCnode; unsigned int kw = neighborX[indexOfBCnode]; @@ -4837,32 +4845,32 @@ __global__ void QVelDeviceComp27( //////////////////////////////////////////////////////////////////////////////// //! - Set local distributions //! - real f_W = (dist.f[DIR_P00 ])[ke ]; - real f_E = (dist.f[DIR_M00 ])[kw ]; - real f_S = (dist.f[DIR_0P0 ])[kn ]; - real f_N = (dist.f[DIR_0M0 ])[ks ]; - real f_B = (dist.f[DIR_00P ])[kt ]; - real f_T = (dist.f[DIR_00M ])[kb ]; - real f_SW = (dist.f[DIR_PP0 ])[kne ]; - real f_NE = (dist.f[DIR_MM0 ])[ksw ]; - real f_NW = (dist.f[DIR_PM0 ])[kse ]; - real f_SE = (dist.f[DIR_MP0 ])[knw ]; - real f_BW = (dist.f[DIR_P0P ])[kte ]; - real f_TE = (dist.f[DIR_M0M ])[kbw ]; - real f_TW = (dist.f[DIR_P0M ])[kbe ]; - real f_BE = (dist.f[DIR_M0P ])[ktw ]; - real f_BS = (dist.f[DIR_0PP ])[ktn ]; - real f_TN = (dist.f[DIR_0MM ])[kbs ]; - real f_TS = (dist.f[DIR_0PM ])[kbn ]; - real f_BN = (dist.f[DIR_0MP ])[kts ]; - real f_BSW = (dist.f[DIR_PPP ])[ktne ]; - real f_BNE = (dist.f[DIR_MMP ])[ktsw ]; - real f_BNW = (dist.f[DIR_PMP ])[ktse ]; - real f_BSE = (dist.f[DIR_MPP ])[ktnw ]; - real f_TSW = (dist.f[DIR_PPM ])[kbne ]; - real f_TNE = (dist.f[DIR_MMM ])[kbsw ]; - real f_TNW = (dist.f[DIR_PMM ])[kbse ]; - real f_TSE = (dist.f[DIR_MPM ])[kbnw ]; + real f_W = (dist.f[DIR_P00])[ke ]; + real f_E = (dist.f[DIR_M00])[kw ]; + real f_S = (dist.f[DIR_0P0])[kn ]; + real f_N = (dist.f[DIR_0M0])[ks ]; + real f_B = (dist.f[DIR_00P])[kt ]; + real f_T = (dist.f[DIR_00M])[kb ]; + real f_SW = (dist.f[DIR_PP0])[kne ]; + real f_NE = (dist.f[DIR_MM0])[ksw ]; + real f_NW = (dist.f[DIR_PM0])[kse ]; + real f_SE = (dist.f[DIR_MP0])[knw ]; + real f_BW = (dist.f[DIR_P0P])[kte ]; + real f_TE = (dist.f[DIR_M0M])[kbw ]; + real f_TW = (dist.f[DIR_P0M])[kbe ]; + real f_BE = (dist.f[DIR_M0P])[ktw ]; + real f_BS = (dist.f[DIR_0PP])[ktn ]; + real f_TN = (dist.f[DIR_0MM])[kbs ]; + real f_TS = (dist.f[DIR_0PM])[kbn ]; + real f_BN = (dist.f[DIR_0MP])[kts ]; + real f_BSW = (dist.f[DIR_PPP])[ktne ]; + real f_BNE = (dist.f[DIR_MMP])[ktsw ]; + real f_BNW = (dist.f[DIR_PMP])[ktse ]; + real f_BSE = (dist.f[DIR_MPP])[ktnw ]; + real f_TSW = (dist.f[DIR_PPM])[kbne ]; + real f_TNE = (dist.f[DIR_MMM])[kbsw ]; + real f_TNW = (dist.f[DIR_PMM])[kbse ]; + real f_TSE = (dist.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// //! - Calculate macroscopic quantities @@ -4894,7 +4902,7 @@ __global__ void QVelDeviceComp27( //! - Update distributions with subgrid distance (q) between zero and one //! real feq, q, velocityLB, velocityBC; - q = (subgridD.q[DIR_P00])[k]; + q = (subgridD.q[DIR_P00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one { velocityLB = vx1; @@ -4903,7 +4911,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_M00])[k]; + q = (subgridD.q[DIR_M00])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1; @@ -4912,7 +4920,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0P0])[k]; + q = (subgridD.q[DIR_0P0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2; @@ -4921,7 +4929,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_0M0])[k]; + q = (subgridD.q[DIR_0M0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2; @@ -4930,7 +4938,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00P])[k]; + q = (subgridD.q[DIR_00P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx3; @@ -4939,7 +4947,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_00M])[k]; + q = (subgridD.q[DIR_00M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx3; @@ -4948,7 +4956,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27); } - q = (subgridD.q[DIR_PP0])[k]; + q = (subgridD.q[DIR_PP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2; @@ -4957,7 +4965,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MM0])[k]; + q = (subgridD.q[DIR_MM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2; @@ -4966,7 +4974,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PM0])[k]; + q = (subgridD.q[DIR_PM0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2; @@ -4975,7 +4983,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_MP0])[k]; + q = (subgridD.q[DIR_MP0])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2; @@ -4984,7 +4992,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0P])[k]; + q = (subgridD.q[DIR_P0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx3; @@ -4993,7 +5001,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0M])[k]; + q = (subgridD.q[DIR_M0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx3; @@ -5002,7 +5010,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_P0M])[k]; + q = (subgridD.q[DIR_P0M])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx3; @@ -5011,7 +5019,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_M0P])[k]; + q = (subgridD.q[DIR_M0P])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx3; @@ -5020,7 +5028,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PP])[k]; + q = (subgridD.q[DIR_0PP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 + vx3; @@ -5029,7 +5037,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MM])[k]; + q = (subgridD.q[DIR_0MM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 - vx3; @@ -5038,7 +5046,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0PM])[k]; + q = (subgridD.q[DIR_0PM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx2 - vx3; @@ -5047,7 +5055,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_0MP])[k]; + q = (subgridD.q[DIR_0MP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx2 + vx3; @@ -5056,7 +5064,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54); } - q = (subgridD.q[DIR_PPP])[k]; + q = (subgridD.q[DIR_PPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 + vx3; @@ -5065,7 +5073,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMM])[k]; + q = (subgridD.q[DIR_MMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 - vx3; @@ -5074,7 +5082,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PPM])[k]; + q = (subgridD.q[DIR_PPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 + vx2 - vx3; @@ -5083,7 +5091,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MMP])[k]; + q = (subgridD.q[DIR_MMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 - vx2 + vx3; @@ -5092,7 +5100,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMP])[k]; + q = (subgridD.q[DIR_PMP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 + vx3; @@ -5101,7 +5109,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPM])[k]; + q = (subgridD.q[DIR_MPM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 - vx3; @@ -5110,7 +5118,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_PMM])[k]; + q = (subgridD.q[DIR_PMM])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = vx1 - vx2 - vx3; @@ -5119,7 +5127,7 @@ __global__ void QVelDeviceComp27( (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216); } - q = (subgridD.q[DIR_MPP])[k]; + q = (subgridD.q[DIR_MPP])[nodeIndex]; if (q>=c0o1 && q<=c1o1) { velocityLB = -vx1 + vx2 + vx3; @@ -5170,82 +5178,83 @@ __global__ void QVelDeviceComp27( ////////////////////////////////////////////////////////////////////////////// -__global__ void QVelDevice27(int inx, - int iny, - real* vx, - real* vy, - real* vz, - real* DD, - int* k_Q, - real* QQ, - unsigned int numberOfBCnodes, - real om1, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - unsigned int size_Mat, - bool isEvenTimestep) +__global__ void QVelDevice27( + int inx, + int iny, + real* vx, + real* vy, + real* vz, + real* DD, + int* k_Q, + real* QQ, + unsigned int numberOfBCnodes, + real om1, + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + unsigned long long numberOfLBnodes, + bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -5270,24 +5279,24 @@ __global__ void QVelDevice27(int inx, *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, *q_dirBSE, *q_dirBNW; - q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -5358,32 +5367,32 @@ __global__ void QVelDevice27(int inx, real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// real vx1, vx2, vx3, drho, feq, q; drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -5408,63 +5417,63 @@ __global__ void QVelDevice27(int inx, ////////////////////////////////////////////////////////////////////////// if (isEvenTimestep==false) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Test @@ -5723,19 +5732,20 @@ __global__ void QVelDevice27(int inx, //////////////////////////////////////////////////////////////////////////////// -__global__ void PropellerBC(unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* rho, - real* ux, - real* uy, - real* uz, - int* k_Q, - unsigned int size_Prop, - unsigned int size_Mat, - unsigned int* bcMatD, - real* DD, - bool EvenOrOdd) +__global__ void PropellerBC( + unsigned int* neighborX, + unsigned int* neighborY, + unsigned int* neighborZ, + real* rho, + real* ux, + real* uy, + real* uz, + int* k_Q, + unsigned int size_Prop, + unsigned long long numberOfLBnodes, + unsigned int* bcMatD, + real* DD, + bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -5754,63 +5764,63 @@ __global__ void PropellerBC(unsigned int* neighborX, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; } ////////////////////////////////////////////////////////////////////////// unsigned int KQK = k_Q[k]; @@ -5859,58 +5869,58 @@ __global__ void PropellerBC(unsigned int* neighborX, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO; f_ZERO= (D.f[DIR_000])[kzero]; - f_E = (D.f[DIR_P00 ])[ke ]; - f_W = (D.f[DIR_M00 ])[kw ]; - f_N = (D.f[DIR_0P0 ])[kn ]; - f_S = (D.f[DIR_0M0 ])[ks ]; - f_T = (D.f[DIR_00P ])[kt ]; - f_B = (D.f[DIR_00M ])[kb ]; - f_NE = (D.f[DIR_PP0 ])[kne ]; - f_SW = (D.f[DIR_MM0 ])[ksw ]; - f_SE = (D.f[DIR_PM0 ])[kse ]; - f_NW = (D.f[DIR_MP0 ])[knw ]; - f_TE = (D.f[DIR_P0P ])[kte ]; - f_BW = (D.f[DIR_M0M ])[kbw ]; - f_BE = (D.f[DIR_P0M ])[kbe ]; - f_TW = (D.f[DIR_M0P ])[ktw ]; - f_TN = (D.f[DIR_0PP ])[ktn ]; - f_BS = (D.f[DIR_0MM ])[kbs ]; - f_BN = (D.f[DIR_0PM ])[kbn ]; - f_TS = (D.f[DIR_0MP ])[kts ]; - f_TNE = (D.f[DIR_PPP ])[ktne ]; - f_BSW = (D.f[DIR_MMM ])[kbsw ]; - f_BNE = (D.f[DIR_PPM ])[kbne ]; - f_TSW = (D.f[DIR_MMP ])[ktsw ]; - f_TSE = (D.f[DIR_PMP ])[ktse ]; - f_BNW = (D.f[DIR_MPM ])[kbnw ]; - f_BSE = (D.f[DIR_PMM ])[kbse ]; - f_TNW = (D.f[DIR_MPP ])[ktnw ]; - //f_W = (D.f[DIR_P00 ])[ke ]; - //f_E = (D.f[DIR_M00 ])[kw ]; - //f_S = (D.f[DIR_0P0 ])[kn ]; - //f_N = (D.f[DIR_0M0 ])[ks ]; - //f_B = (D.f[DIR_00P ])[kt ]; - //f_T = (D.f[DIR_00M ])[kb ]; - //f_SW = (D.f[DIR_PP0 ])[kne ]; - //f_NE = (D.f[DIR_MM0 ])[ksw ]; - //f_NW = (D.f[DIR_PM0 ])[kse ]; - //f_SE = (D.f[DIR_MP0 ])[knw ]; - //f_BW = (D.f[DIR_P0P ])[kte ]; - //f_TE = (D.f[DIR_M0M ])[kbw ]; - //f_TW = (D.f[DIR_P0M ])[kbe ]; - //f_BE = (D.f[DIR_M0P ])[ktw ]; - //f_BS = (D.f[DIR_0PP ])[ktn ]; - //f_TN = (D.f[DIR_0MM ])[kbs ]; - //f_TS = (D.f[DIR_0PM ])[kbn ]; - //f_BN = (D.f[DIR_0MP ])[kts ]; - //f_BSW = (D.f[DIR_PPP ])[ktne ]; - //f_TNE = (D.f[DIR_MMM ])[kbsw ]; - //f_TSW = (D.f[DIR_PPM ])[kbne ]; - //f_BNE = (D.f[DIR_MMP ])[ktsw ]; - //f_BNW = (D.f[DIR_PMP ])[ktse ]; - //f_TSE = (D.f[DIR_MPM ])[kbnw ]; - //f_TNW = (D.f[DIR_PMM ])[kbse ]; - //f_BSE = (D.f[DIR_MPP ])[ktnw ]; + f_E = (D.f[DIR_P00])[ke ]; + f_W = (D.f[DIR_M00])[kw ]; + f_N = (D.f[DIR_0P0])[kn ]; + f_S = (D.f[DIR_0M0])[ks ]; + f_T = (D.f[DIR_00P])[kt ]; + f_B = (D.f[DIR_00M])[kb ]; + f_NE = (D.f[DIR_PP0])[kne ]; + f_SW = (D.f[DIR_MM0])[ksw ]; + f_SE = (D.f[DIR_PM0])[kse ]; + f_NW = (D.f[DIR_MP0])[knw ]; + f_TE = (D.f[DIR_P0P])[kte ]; + f_BW = (D.f[DIR_M0M])[kbw ]; + f_BE = (D.f[DIR_P0M])[kbe ]; + f_TW = (D.f[DIR_M0P])[ktw ]; + f_TN = (D.f[DIR_0PP])[ktn ]; + f_BS = (D.f[DIR_0MM])[kbs ]; + f_BN = (D.f[DIR_0PM])[kbn ]; + f_TS = (D.f[DIR_0MP])[kts ]; + f_TNE = (D.f[DIR_PPP])[ktne ]; + f_BSW = (D.f[DIR_MMM])[kbsw ]; + f_BNE = (D.f[DIR_PPM])[kbne ]; + f_TSW = (D.f[DIR_MMP])[ktsw ]; + f_TSE = (D.f[DIR_PMP])[ktse ]; + f_BNW = (D.f[DIR_MPM])[kbnw ]; + f_BSE = (D.f[DIR_PMM])[kbse ]; + f_TNW = (D.f[DIR_MPP])[ktnw ]; + //f_W = (D.f[DIR_P00])[ke ]; + //f_E = (D.f[DIR_M00])[kw ]; + //f_S = (D.f[DIR_0P0])[kn ]; + //f_N = (D.f[DIR_0M0])[ks ]; + //f_B = (D.f[DIR_00P])[kt ]; + //f_T = (D.f[DIR_00M])[kb ]; + //f_SW = (D.f[DIR_PP0])[kne ]; + //f_NE = (D.f[DIR_MM0])[ksw ]; + //f_NW = (D.f[DIR_PM0])[kse ]; + //f_SE = (D.f[DIR_MP0])[knw ]; + //f_BW = (D.f[DIR_P0P])[kte ]; + //f_TE = (D.f[DIR_M0M])[kbw ]; + //f_TW = (D.f[DIR_P0M])[kbe ]; + //f_BE = (D.f[DIR_M0P])[ktw ]; + //f_BS = (D.f[DIR_0PP])[ktn ]; + //f_TN = (D.f[DIR_0MM])[kbs ]; + //f_TS = (D.f[DIR_0PM])[kbn ]; + //f_BN = (D.f[DIR_0MP])[kts ]; + //f_BSW = (D.f[DIR_PPP])[ktne ]; + //f_TNE = (D.f[DIR_MMM])[kbsw ]; + //f_TSW = (D.f[DIR_PPM])[kbne ]; + //f_BNE = (D.f[DIR_MMP])[ktsw ]; + //f_BNW = (D.f[DIR_PMP])[ktse ]; + //f_TSE = (D.f[DIR_MPM])[kbnw ]; + //f_TNW = (D.f[DIR_PMM])[kbse ]; + //f_BSE = (D.f[DIR_MPP])[ktnw ]; ////////////////////////////////////////////////////////////////////////////////// real vxo1, vxo2, vxo3, drho; drho = /*zero;*/f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW + @@ -5992,88 +6002,88 @@ __global__ void PropellerBC(unsigned int* neighborX, f_TNW = f_TNW + ((c1o1+drho) * (- c1o216*(c3o1*(-vxo1+vxo2+vxo3)+c9o2*(-vxo1+vxo2+vxo3)*(-vxo1+vxo2+vxo3)-cusq) + c1o216*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq2))); (D.f[DIR_000])[kzero] = f_ZERO; - (D.f[DIR_P00 ])[ke ] = f_E ; // f_W ;// - (D.f[DIR_M00 ])[kw ] = f_W ; // f_E ;// - (D.f[DIR_0P0 ])[kn ] = f_N ; // f_S ;// - (D.f[DIR_0M0 ])[ks ] = f_S ; // f_N ;// - (D.f[DIR_00P ])[kt ] = f_T ; // f_B ;// - (D.f[DIR_00M ])[kb ] = f_B ; // f_T ;// - (D.f[DIR_PP0 ])[kne ] = f_NE ; // f_SW ;// - (D.f[DIR_MM0 ])[ksw ] = f_SW ; // f_NE ;// - (D.f[DIR_PM0 ])[kse ] = f_SE ; // f_NW ;// - (D.f[DIR_MP0 ])[knw ] = f_NW ; // f_SE ;// - (D.f[DIR_P0P ])[kte ] = f_TE ; // f_BW ;// - (D.f[DIR_M0M ])[kbw ] = f_BW ; // f_TE ;// - (D.f[DIR_P0M ])[kbe ] = f_BE ; // f_TW ;// - (D.f[DIR_M0P ])[ktw ] = f_TW ; // f_BE ;// - (D.f[DIR_0PP ])[ktn ] = f_TN ; // f_BS ;// - (D.f[DIR_0MM ])[kbs ] = f_BS ; // f_TN ;// - (D.f[DIR_0PM ])[kbn ] = f_BN ; // f_TS ;// - (D.f[DIR_0MP ])[kts ] = f_TS ; // f_BN ;// - (D.f[DIR_PPP ])[ktne ] = f_TNE ; // f_BSW ;// - (D.f[DIR_MMM ])[kbsw ] = f_BSW ; // f_BNE ;// - (D.f[DIR_PPM ])[kbne ] = f_BNE ; // f_BNW ;// - (D.f[DIR_MMP ])[ktsw ] = f_TSW ; // f_BSE ;// - (D.f[DIR_PMP ])[ktse ] = f_TSE ; // f_TSW ;// - (D.f[DIR_MPM ])[kbnw ] = f_BNW ; // f_TNE ;// - (D.f[DIR_PMM ])[kbse ] = f_BSE ; // f_TNW ;// - (D.f[DIR_MPP ])[ktnw ] = f_TNW ; // f_TSE ;// + (D.f[DIR_P00])[ke ] = f_E ; // f_W ;// + (D.f[DIR_M00])[kw ] = f_W ; // f_E ;// + (D.f[DIR_0P0])[kn ] = f_N ; // f_S ;// + (D.f[DIR_0M0])[ks ] = f_S ; // f_N ;// + (D.f[DIR_00P])[kt ] = f_T ; // f_B ;// + (D.f[DIR_00M])[kb ] = f_B ; // f_T ;// + (D.f[DIR_PP0])[kne ] = f_NE ; // f_SW ;// + (D.f[DIR_MM0])[ksw ] = f_SW ; // f_NE ;// + (D.f[DIR_PM0])[kse ] = f_SE ; // f_NW ;// + (D.f[DIR_MP0])[knw ] = f_NW ; // f_SE ;// + (D.f[DIR_P0P])[kte ] = f_TE ; // f_BW ;// + (D.f[DIR_M0M])[kbw ] = f_BW ; // f_TE ;// + (D.f[DIR_P0M])[kbe ] = f_BE ; // f_TW ;// + (D.f[DIR_M0P])[ktw ] = f_TW ; // f_BE ;// + (D.f[DIR_0PP])[ktn ] = f_TN ; // f_BS ;// + (D.f[DIR_0MM])[kbs ] = f_BS ; // f_TN ;// + (D.f[DIR_0PM])[kbn ] = f_BN ; // f_TS ;// + (D.f[DIR_0MP])[kts ] = f_TS ; // f_BN ;// + (D.f[DIR_PPP])[ktne ] = f_TNE ; // f_BSW ;// + (D.f[DIR_MMM])[kbsw ] = f_BSW ; // f_BNE ;// + (D.f[DIR_PPM])[kbne ] = f_BNE ; // f_BNW ;// + (D.f[DIR_MMP])[ktsw ] = f_TSW ; // f_BSE ;// + (D.f[DIR_PMP])[ktse ] = f_TSE ; // f_TSW ;// + (D.f[DIR_MPM])[kbnw ] = f_BNW ; // f_TNE ;// + (D.f[DIR_PMM])[kbse ] = f_BSE ; // f_TNW ;// + (D.f[DIR_MPP])[ktnw ] = f_TNW ; // f_TSE ;// ////////////////////////////////////////////////////////////////////////// ////(D.f[DIR_000])[kzero] = c8over27* (drho-cu_sq); - //(D.f[DIR_P00 ])[ke ] = three*c2over27* ( vx1 ); //six - //(D.f[DIR_M00 ])[kw ] = three*c2over27* (-vx1 ); //six - //(D.f[DIR_0P0 ])[kn ] = three*c2over27* ( vx2 ); //six - //(D.f[DIR_0M0 ])[ks ] = three*c2over27* ( -vx2 ); //six - //(D.f[DIR_00P ])[kt ] = three*c2over27* ( vx3); //six - //(D.f[DIR_00M ])[kb ] = three*c2over27* ( -vx3); //six - //(D.f[DIR_PP0 ])[kne ] = three*c1over54* ( vx1+vx2 ); //six - //(D.f[DIR_MM0 ])[ksw ] = three*c1over54* (-vx1-vx2 ); //six - //(D.f[DIR_PM0 ])[kse ] = three*c1over54* ( vx1-vx2 ); //six - //(D.f[DIR_MP0 ])[knw ] = three*c1over54* (-vx1+vx2 ); //six - //(D.f[DIR_P0P ])[kte ] = three*c1over54* ( vx1 +vx3); //six - //(D.f[DIR_M0M ])[kbw ] = three*c1over54* (-vx1 -vx3); //six - //(D.f[DIR_P0M ])[kbe ] = three*c1over54* ( vx1 -vx3); //six - //(D.f[DIR_M0P ])[ktw ] = three*c1over54* (-vx1 +vx3); //six - //(D.f[DIR_0PP ])[ktn ] = three*c1over54* ( vx2+vx3); //six - //(D.f[DIR_0MM ])[kbs ] = three*c1over54* ( -vx2-vx3); //six - //(D.f[DIR_0PM ])[kbn ] = three*c1over54* ( vx2-vx3); //six - //(D.f[DIR_0MP ])[kts ] = three*c1over54* ( -vx2+vx3); //six - //(D.f[DIR_PPP ])[ktne ] = three*c1over216*( vx1+vx2+vx3); //six - //(D.f[DIR_MMM ])[kbsw ] = three*c1over216*(-vx1-vx2-vx3); //six - //(D.f[DIR_PPM ])[kbne ] = three*c1over216*( vx1+vx2-vx3); //six - //(D.f[DIR_MMP ])[ktsw ] = three*c1over216*(-vx1-vx2+vx3); //six - //(D.f[DIR_PMP ])[ktse ] = three*c1over216*( vx1-vx2+vx3); //six - //(D.f[DIR_MPM ])[kbnw ] = three*c1over216*(-vx1+vx2-vx3); //six - //(D.f[DIR_PMM ])[kbse ] = three*c1over216*( vx1-vx2-vx3); //six - //(D.f[DIR_MPP ])[ktnw ] = three*c1over216*(-vx1+vx2+vx3); //six + //(D.f[DIR_P00])[ke ] = three*c2over27* ( vx1 ); //six + //(D.f[DIR_M00])[kw ] = three*c2over27* (-vx1 ); //six + //(D.f[DIR_0P0])[kn ] = three*c2over27* ( vx2 ); //six + //(D.f[DIR_0M0])[ks ] = three*c2over27* ( -vx2 ); //six + //(D.f[DIR_00P])[kt ] = three*c2over27* ( vx3); //six + //(D.f[DIR_00M])[kb ] = three*c2over27* ( -vx3); //six + //(D.f[DIR_PP0])[kne ] = three*c1over54* ( vx1+vx2 ); //six + //(D.f[DIR_MM0])[ksw ] = three*c1over54* (-vx1-vx2 ); //six + //(D.f[DIR_PM0])[kse ] = three*c1over54* ( vx1-vx2 ); //six + //(D.f[DIR_MP0])[knw ] = three*c1over54* (-vx1+vx2 ); //six + //(D.f[DIR_P0P])[kte ] = three*c1over54* ( vx1 +vx3); //six + //(D.f[DIR_M0M])[kbw ] = three*c1over54* (-vx1 -vx3); //six + //(D.f[DIR_P0M])[kbe ] = three*c1over54* ( vx1 -vx3); //six + //(D.f[DIR_M0P])[ktw ] = three*c1over54* (-vx1 +vx3); //six + //(D.f[DIR_0PP])[ktn ] = three*c1over54* ( vx2+vx3); //six + //(D.f[DIR_0MM])[kbs ] = three*c1over54* ( -vx2-vx3); //six + //(D.f[DIR_0PM])[kbn ] = three*c1over54* ( vx2-vx3); //six + //(D.f[DIR_0MP])[kts ] = three*c1over54* ( -vx2+vx3); //six + //(D.f[DIR_PPP])[ktne ] = three*c1over216*( vx1+vx2+vx3); //six + //(D.f[DIR_MMM])[kbsw ] = three*c1over216*(-vx1-vx2-vx3); //six + //(D.f[DIR_PPM])[kbne ] = three*c1over216*( vx1+vx2-vx3); //six + //(D.f[DIR_MMP])[ktsw ] = three*c1over216*(-vx1-vx2+vx3); //six + //(D.f[DIR_PMP])[ktse ] = three*c1over216*( vx1-vx2+vx3); //six + //(D.f[DIR_MPM])[kbnw ] = three*c1over216*(-vx1+vx2-vx3); //six + //(D.f[DIR_PMM])[kbse ] = three*c1over216*( vx1-vx2-vx3); //six + //(D.f[DIR_MPP])[ktnw ] = three*c1over216*(-vx1+vx2+vx3); //six //(D.f[DIR_000])[kzero] = c8over27* (drho-cu_sq); - //(D.f[DIR_P00 ])[ke ] = c2over27* (drho+three*( vx1 )+c9over2*( vx1 )*( vx1 )-cu_sq); - //(D.f[DIR_M00 ])[kw ] = c2over27* (drho+three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cu_sq); - //(D.f[DIR_0P0 ])[kn ] = c2over27* (drho+three*( vx2 )+c9over2*( vx2 )*( vx2 )-cu_sq); - //(D.f[DIR_0M0 ])[ks ] = c2over27* (drho+three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cu_sq); - //(D.f[DIR_00P ])[kt ] = c2over27* (drho+three*( vx3)+c9over2*( vx3)*( vx3)-cu_sq); - //(D.f[DIR_00M ])[kb ] = c2over27* (drho+three*( -vx3)+c9over2*( -vx3)*( -vx3)-cu_sq); - //(D.f[DIR_PP0 ])[kne ] = c1over54* (drho+three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); - //(D.f[DIR_MM0 ])[ksw ] = c1over54* (drho+three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); - //(D.f[DIR_PM0 ])[kse ] = c1over54* (drho+three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); - //(D.f[DIR_MP0 ])[knw ] = c1over54* (drho+three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); - //(D.f[DIR_P0P ])[kte ] = c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); - //(D.f[DIR_M0M ])[kbw ] = c1over54* (drho+three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); - //(D.f[DIR_P0M ])[kbe ] = c1over54* (drho+three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); - //(D.f[DIR_M0P ])[ktw ] = c1over54* (drho+three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); - //(D.f[DIR_0PP ])[ktn ] = c1over54* (drho+three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cu_sq); - //(D.f[DIR_0MM ])[kbs ] = c1over54* (drho+three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); - //(D.f[DIR_0PM ])[kbn ] = c1over54* (drho+three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cu_sq); - //(D.f[DIR_0MP ])[kts ] = c1over54* (drho+three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); - //(D.f[DIR_PPP ])[ktne ] = c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); - //(D.f[DIR_MMM ])[kbsw ] = c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); - //(D.f[DIR_PPM ])[kbne ] = c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); - //(D.f[DIR_MMP ])[ktsw ] = c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); - //(D.f[DIR_PMP ])[ktse ] = c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); - //(D.f[DIR_MPM ])[kbnw ] = c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); - //(D.f[DIR_PMM ])[kbse ] = c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); - //(D.f[DIR_MPP ])[ktnw ] = c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); + //(D.f[DIR_P00])[ke ] = c2over27* (drho+three*( vx1 )+c9over2*( vx1 )*( vx1 )-cu_sq); + //(D.f[DIR_M00])[kw ] = c2over27* (drho+three*(-vx1 )+c9over2*(-vx1 )*(-vx1 )-cu_sq); + //(D.f[DIR_0P0])[kn ] = c2over27* (drho+three*( vx2 )+c9over2*( vx2 )*( vx2 )-cu_sq); + //(D.f[DIR_0M0])[ks ] = c2over27* (drho+three*( -vx2 )+c9over2*( -vx2 )*( -vx2 )-cu_sq); + //(D.f[DIR_00P])[kt ] = c2over27* (drho+three*( vx3)+c9over2*( vx3)*( vx3)-cu_sq); + //(D.f[DIR_00M])[kb ] = c2over27* (drho+three*( -vx3)+c9over2*( -vx3)*( -vx3)-cu_sq); + //(D.f[DIR_PP0])[kne ] = c1over54* (drho+three*( vx1+vx2 )+c9over2*( vx1+vx2 )*( vx1+vx2 )-cu_sq); + //(D.f[DIR_MM0])[ksw ] = c1over54* (drho+three*(-vx1-vx2 )+c9over2*(-vx1-vx2 )*(-vx1-vx2 )-cu_sq); + //(D.f[DIR_PM0])[kse ] = c1over54* (drho+three*( vx1-vx2 )+c9over2*( vx1-vx2 )*( vx1-vx2 )-cu_sq); + //(D.f[DIR_MP0])[knw ] = c1over54* (drho+three*(-vx1+vx2 )+c9over2*(-vx1+vx2 )*(-vx1+vx2 )-cu_sq); + //(D.f[DIR_P0P])[kte ] = c1over54* (drho+three*( vx1 +vx3)+c9over2*( vx1 +vx3)*( vx1 +vx3)-cu_sq); + //(D.f[DIR_M0M])[kbw ] = c1over54* (drho+three*(-vx1 -vx3)+c9over2*(-vx1 -vx3)*(-vx1 -vx3)-cu_sq); + //(D.f[DIR_P0M])[kbe ] = c1over54* (drho+three*( vx1 -vx3)+c9over2*( vx1 -vx3)*( vx1 -vx3)-cu_sq); + //(D.f[DIR_M0P])[ktw ] = c1over54* (drho+three*(-vx1 +vx3)+c9over2*(-vx1 +vx3)*(-vx1 +vx3)-cu_sq); + //(D.f[DIR_0PP])[ktn ] = c1over54* (drho+three*( vx2+vx3)+c9over2*( vx2+vx3)*( vx2+vx3)-cu_sq); + //(D.f[DIR_0MM])[kbs ] = c1over54* (drho+three*( -vx2-vx3)+c9over2*( -vx2-vx3)*( -vx2-vx3)-cu_sq); + //(D.f[DIR_0PM])[kbn ] = c1over54* (drho+three*( vx2-vx3)+c9over2*( vx2-vx3)*( vx2-vx3)-cu_sq); + //(D.f[DIR_0MP])[kts ] = c1over54* (drho+three*( -vx2+vx3)+c9over2*( -vx2+vx3)*( -vx2+vx3)-cu_sq); + //(D.f[DIR_PPP])[ktne ] = c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); + //(D.f[DIR_MMM])[kbsw ] = c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); + //(D.f[DIR_PPM])[kbne ] = c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); + //(D.f[DIR_MMP])[ktsw ] = c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); + //(D.f[DIR_PMP])[ktse ] = c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); + //(D.f[DIR_MPM])[kbnw ] = c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); + //(D.f[DIR_PMM])[kbse ] = c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); + //(D.f[DIR_MPP])[ktnw ] = c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); } } } diff --git a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu index 16028e2f9f87716f43ed60f82ed513289e381b7c..cbb892296322bc164241ad18c8ab63201d34647e 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu @@ -23,7 +23,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27( real* veloZ, real* DDStart, real* turbulentViscosity, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, bool EvenOrOdd) @@ -39,7 +39,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27( const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -50,63 +50,63 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu index d48fa80fd14ce15f4a380ed46403654b43c805e8..d2fe5935af9b2d3ad78f492e3a9d182873d20808 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu +++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu @@ -20,69 +20,69 @@ __global__ void WallFunction27( unsigned int* neighborX, unsigned int* neighborY, unsigned int* neighborZ, - unsigned int size_Mat, + unsigned long long numberOfLBnodes, bool isEvenTimestep) { Distributions27 D; if (isEvenTimestep==true) { - D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DD[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes]; + D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// const unsigned x = threadIdx.x; // Globaler x-Index @@ -107,24 +107,24 @@ __global__ void WallFunction27( // *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS, // *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW, // *q_dirBSE, *q_dirBNW; - //q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; - //q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; - //q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; - //q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; - //q_dirT = &QQ[DIR_00P * numberOfBCnodes]; - //q_dirB = &QQ[DIR_00M * numberOfBCnodes]; - //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; - //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; - //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; - //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; - //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; - //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; - //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; - //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; - //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; - //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; - //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; - //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; + //q_dirE = &QQ[DIR_P00 * numberOfBCnodes]; + //q_dirW = &QQ[DIR_M00 * numberOfBCnodes]; + //q_dirN = &QQ[DIR_0P0 * numberOfBCnodes]; + //q_dirS = &QQ[DIR_0M0 * numberOfBCnodes]; + //q_dirT = &QQ[DIR_00P * numberOfBCnodes]; + //q_dirB = &QQ[DIR_00M * numberOfBCnodes]; + //q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes]; + //q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes]; + //q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes]; + //q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes]; + //q_dirTE = &QQ[DIR_P0P * numberOfBCnodes]; + //q_dirBW = &QQ[DIR_M0M * numberOfBCnodes]; + //q_dirBE = &QQ[DIR_P0M * numberOfBCnodes]; + //q_dirTW = &QQ[DIR_M0P * numberOfBCnodes]; + //q_dirTN = &QQ[DIR_0PP * numberOfBCnodes]; + //q_dirBS = &QQ[DIR_0MM * numberOfBCnodes]; + //q_dirBN = &QQ[DIR_0PM * numberOfBCnodes]; + //q_dirTS = &QQ[DIR_0MP * numberOfBCnodes]; //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes]; //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes]; //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes]; @@ -167,32 +167,32 @@ __global__ void WallFunction27( real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE, f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW; - f_W = (D.f[DIR_P00 ])[ke ]; - f_E = (D.f[DIR_M00 ])[kw ]; - f_S = (D.f[DIR_0P0 ])[kn ]; - f_N = (D.f[DIR_0M0 ])[ks ]; - f_B = (D.f[DIR_00P ])[kt ]; - f_T = (D.f[DIR_00M ])[kb ]; - f_SW = (D.f[DIR_PP0 ])[kne ]; - f_NE = (D.f[DIR_MM0 ])[ksw ]; - f_NW = (D.f[DIR_PM0 ])[kse ]; - f_SE = (D.f[DIR_MP0 ])[knw ]; - f_BW = (D.f[DIR_P0P ])[kte ]; - f_TE = (D.f[DIR_M0M ])[kbw ]; - f_TW = (D.f[DIR_P0M ])[kbe ]; - f_BE = (D.f[DIR_M0P ])[ktw ]; - f_BS = (D.f[DIR_0PP ])[ktn ]; - f_TN = (D.f[DIR_0MM ])[kbs ]; - f_TS = (D.f[DIR_0PM ])[kbn ]; - f_BN = (D.f[DIR_0MP ])[kts ]; - f_BSW = (D.f[DIR_PPP ])[ktne ]; - f_BNE = (D.f[DIR_MMP ])[ktsw ]; - f_BNW = (D.f[DIR_PMP ])[ktse ]; - f_BSE = (D.f[DIR_MPP ])[ktnw ]; - f_TSW = (D.f[DIR_PPM ])[kbne ]; - f_TNE = (D.f[DIR_MMM ])[kbsw ]; - f_TNW = (D.f[DIR_PMM ])[kbse ]; - f_TSE = (D.f[DIR_MPM ])[kbnw ]; + f_W = (D.f[DIR_P00])[ke ]; + f_E = (D.f[DIR_M00])[kw ]; + f_S = (D.f[DIR_0P0])[kn ]; + f_N = (D.f[DIR_0M0])[ks ]; + f_B = (D.f[DIR_00P])[kt ]; + f_T = (D.f[DIR_00M])[kb ]; + f_SW = (D.f[DIR_PP0])[kne ]; + f_NE = (D.f[DIR_MM0])[ksw ]; + f_NW = (D.f[DIR_PM0])[kse ]; + f_SE = (D.f[DIR_MP0])[knw ]; + f_BW = (D.f[DIR_P0P])[kte ]; + f_TE = (D.f[DIR_M0M])[kbw ]; + f_TW = (D.f[DIR_P0M])[kbe ]; + f_BE = (D.f[DIR_M0P])[ktw ]; + f_BS = (D.f[DIR_0PP])[ktn ]; + f_TN = (D.f[DIR_0MM])[kbs ]; + f_TS = (D.f[DIR_0PM])[kbn ]; + f_BN = (D.f[DIR_0MP])[kts ]; + f_BSW = (D.f[DIR_PPP])[ktne ]; + f_BNE = (D.f[DIR_MMP])[ktsw ]; + f_BNW = (D.f[DIR_PMP])[ktse ]; + f_BSE = (D.f[DIR_MPP])[ktnw ]; + f_TSW = (D.f[DIR_PPM])[kbne ]; + f_TNE = (D.f[DIR_MMM])[kbsw ]; + f_TNW = (D.f[DIR_PMM])[kbse ]; + f_TSE = (D.f[DIR_MPM])[kbnw ]; //////////////////////////////////////////////////////////////////////////////// // real vx2, vx3, feq, q; real vx1, drho; @@ -234,63 +234,63 @@ __global__ void WallFunction27( // ////////////////////////////////////////////////////////////////////////// // if (isEvenTimestep==false) // { - // D.f[DIR_P00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_M00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat]; + // D.f[DIR_P00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_M00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat]; // } // else // { - // D.f[DIR_M00 ] = &DD[DIR_P00 *size_Mat]; - // D.f[DIR_P00 ] = &DD[DIR_M00 *size_Mat]; - // D.f[DIR_0M0 ] = &DD[DIR_0P0 *size_Mat]; - // D.f[DIR_0P0 ] = &DD[DIR_0M0 *size_Mat]; - // D.f[DIR_00M ] = &DD[DIR_00P *size_Mat]; - // D.f[DIR_00P ] = &DD[DIR_00M *size_Mat]; - // D.f[DIR_MM0 ] = &DD[DIR_PP0 *size_Mat]; - // D.f[DIR_PP0 ] = &DD[DIR_MM0 *size_Mat]; - // D.f[DIR_MP0 ] = &DD[DIR_PM0 *size_Mat]; - // D.f[DIR_PM0 ] = &DD[DIR_MP0 *size_Mat]; - // D.f[DIR_M0M ] = &DD[DIR_P0P *size_Mat]; - // D.f[DIR_P0P ] = &DD[DIR_M0M *size_Mat]; - // D.f[DIR_M0P ] = &DD[DIR_P0M *size_Mat]; - // D.f[DIR_P0M ] = &DD[DIR_M0P *size_Mat]; - // D.f[DIR_0MM ] = &DD[DIR_0PP *size_Mat]; - // D.f[DIR_0PP ] = &DD[DIR_0MM *size_Mat]; - // D.f[DIR_0MP ] = &DD[DIR_0PM *size_Mat]; - // D.f[DIR_0PM ] = &DD[DIR_0MP *size_Mat]; - // D.f[DIR_000] = &DD[DIR_000*size_Mat]; - // D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat]; - // D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat]; - // D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat]; - // D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat]; - // D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat]; - // D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat]; - // D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat]; - // D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat]; + // D.f[DIR_M00] = &DD[DIR_P00 * size_Mat]; + // D.f[DIR_P00] = &DD[DIR_M00 * size_Mat]; + // D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat]; + // D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat]; + // D.f[DIR_00M] = &DD[DIR_00P * size_Mat]; + // D.f[DIR_00P] = &DD[DIR_00M * size_Mat]; + // D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat]; + // D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat]; + // D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat]; + // D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat]; + // D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat]; + // D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat]; + // D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat]; + // D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat]; + // D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat]; + // D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat]; + // D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat]; + // D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat]; + // D.f[DIR_000] = &DD[DIR_000 * size_Mat]; + // D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat]; + // D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat]; + // D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat]; + // D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat]; + // D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat]; + // D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat]; + // D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat]; + // D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat]; // } // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // //Test diff --git a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp index 2c85de9e3ec57d50a66fde2c49d3e703676fbf04..508e4498c36d352761c3ecaf24abaa52a5f84bbe 100644 --- a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp +++ b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp @@ -45,26 +45,44 @@ void initLattice(SPtr<Parameter> para, SPtr<PreProcessor> preProcessor, SPtr<Cud preProcessor->init(para, lev); CalcMacCompSP27( - para->getParD(lev)->velocityX, para->getParD(lev)->velocityY, para->getParD(lev)->velocityZ, para->getParD(lev)->rho, - para->getParD(lev)->pressure, para->getParD(lev)->typeOfGridNode, para->getParD(lev)->neighborX, - para->getParD(lev)->neighborY, para->getParD(lev)->neighborZ, para->getParD(lev)->numberOfNodes, - para->getParD(lev)->numberofthreads, para->getParD(lev)->distributions.f[0], para->getParD(lev)->isEvenTimestep); + para->getParD(lev)->velocityX, + para->getParD(lev)->velocityY, + para->getParD(lev)->velocityZ, + para->getParD(lev)->rho, + para->getParD(lev)->pressure, + para->getParD(lev)->typeOfGridNode, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + para->getParD(lev)->numberOfNodes, + para->getParD(lev)->numberofthreads, + para->getParD(lev)->distributions.f[0], + para->getParD(lev)->isEvenTimestep); if (para->getCalcMedian()) { constexpr uint tdiff = 1; - CalcMacMedSP27(para->getParD(lev)->vx_SP_Med, para->getParD(lev)->vy_SP_Med, para->getParD(lev)->vz_SP_Med, - para->getParD(lev)->rho_SP_Med, para->getParD(lev)->press_SP_Med, para->getParD(lev)->typeOfGridNode, - para->getParD(lev)->neighborX, para->getParD(lev)->neighborY, - para->getParD(lev)->neighborZ, tdiff, para->getParD(lev)->numberOfNodes, - para->getParD(lev)->numberofthreads, para->getParD(lev)->isEvenTimestep); + CalcMacMedSP27( + para->getParD(lev)->vx_SP_Med, + para->getParD(lev)->vy_SP_Med, + para->getParD(lev)->vz_SP_Med, + para->getParD(lev)->rho_SP_Med, + para->getParD(lev)->press_SP_Med, + para->getParD(lev)->typeOfGridNode, + para->getParD(lev)->neighborX, + para->getParD(lev)->neighborY, + para->getParD(lev)->neighborZ, + tdiff, + para->getParD(lev)->numberOfNodes, + para->getParD(lev)->numberofthreads, + para->getParD(lev)->isEvenTimestep); } // advection - diffusion if (para->getDiffOn()) { cudaMemoryManager->cudaAllocConcentration(lev); - for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++) { - para->getParH(lev)->Conc[i] = para->getTemperatureInit(); + for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) { + para->getParH(lev)->Conc[index] = para->getTemperatureInit(); } initTemperatur(para.get(), cudaMemoryManager.get(), lev); } diff --git a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp index 4e5a862d3fd1ed19109073aae0fe4c731f7f3e91..6eaa0b17653aaf5257c00e674c87e2844c26cf5d 100644 --- a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp +++ b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp @@ -3,6 +3,7 @@ #include "Parameter/Parameter.h" #include <basics/utilities/UbFileInputASCII.h> + using namespace vf::lbm::dir; ////////////////////////////////////////////////////////////////////////// @@ -169,7 +170,7 @@ void PositionReader::definePropellerQs(Parameter* para) ////////////////////////////////////////////////////////////////// for(uint u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++) { - for (int dir = DIR_P00; dir<=DIR_MMM; dir++) + for (size_t dir = DIR_P00; dir<=DIR_MMM; dir++) { if ((dir==DIR_P00) || (dir==DIR_PP0) || (dir==DIR_PM0) || (dir==DIR_P0P) || (dir==DIR_P0M) || diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h index 9f9f7539bc5a1e28612d956ca32234c5a3589f8a..50b4460d774010ea7d7b98cfa6fa505cdfeb88c2 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h @@ -3,8 +3,11 @@ #include <vector> +#include "LBM/LB.h" + #include "Kernel/Utilities/KernelGroup.h" #include "PreProcessor/PreProcessorType.h" +#include "Parameter/CudaStreamManager.h" #include <helper_cuda.h> @@ -13,7 +16,7 @@ class Kernel public: virtual ~Kernel() = default; virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) = 0; //if stream == -1: run on default stream + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0; virtual bool checkParameter() = 0; virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp index 630aaf7339afc2907ab6bfbf65bd5fc55f75e215..9bd3945aa81147d03be2b1eac3ddec7c24d71532 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp @@ -1,9 +1,11 @@ #include "KernelImp.h" +#include "LBM/LB.h" + #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h" -void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream) +void KernelImp::runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex) { printf("Method not implemented for this Kernel \n"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h index 0141ddda7e9579cc84148d26727ed81c084ea0c5..a96c2c123472ca33f635273e06a5bf36a745654d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h +++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h @@ -1,6 +1,8 @@ #ifndef KERNEL_IMP_H #define KERNEL_IMP_H +#include "LBM/LB.h" + #include "Kernel.h" #include <memory> @@ -9,12 +11,12 @@ class CheckParameterStrategy; class Parameter; - +class CudaStreamManager; class KernelImp : public Kernel { public: virtual void run() = 0; - virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1); + virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex=CudaStreamIndex::Legacy); bool checkParameter(); std::vector<PreProcessorType> getPreProcessorTypes(); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu index 51b9e4537fa0857e9302aa638ae7729fa9adcdbe..d4d6307f688da4c8fa37c54fb4958681d5ec4941 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu @@ -2,6 +2,7 @@ #include "ADComp27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,34 +11,19 @@ std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> pa void ADComp27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_KERNEL_AD_COMP_27 << < grid, threads >> >( para->getParD(level)->diffusivity, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->distributionsAD27.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_ThS27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_KERNEL_AD_COMP_27<<< grid.grid, grid.threads >>>( + para->getParD(level)->diffusivity, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->distributionsAD27.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_KERNEL_AD_COMP_27 execution failed"); } ADComp27::ADComp27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu index b4c1236300bbb49fe2df1b3f458f506e989e142b..40adfff91713b7d6db1e861be9282d1f38516c22 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu @@ -38,125 +38,125 @@ __global__ void LB_KERNEL_AD_COMP_27(real diffusivity, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } Distributions27 D27; if (EvenOrOdd == true) { - D27.f[DIR_P00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat]; + D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat]; + D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat]; + D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat]; } else { - D27.f[DIR_M00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat]; + D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat]; + D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat]; + D27.f[DIR_000] = &DD27[DIR_000 * size_Mat]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu index ab9b0c444513455e0498d79614575e87c2afb6a0..3ee06a1e9ea77c8443d94f44ea54d11ffe7304ac 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu @@ -2,6 +2,7 @@ #include "ADComp7_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,34 +11,19 @@ std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para void ADComp7::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_AD_Comp_7 << < grid, threads >> >( para->getParD(level)->diffusivity, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->distributionsAD7.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_ThS7 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_AD_Comp_7<<< grid.grid, grid.threads >>>( + para->getParD(level)->diffusivity, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->distributionsAD7.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_AD_Comp_7 execution failed"); } ADComp7::ADComp7(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu index 52ab9ba6e968ec2293f0a1c4959323c43f328206..ddaed84703640cd9c7d12d142ccc1bf8f9ea7efc 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu @@ -39,63 +39,63 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } Distributions7 D7; @@ -157,33 +157,33 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity, real fTNE = (D.f[DIR_MMM])[kbsw]; real fTNW = (D.f[DIR_PMM])[kbs];//kbse real fTSE = (D.f[DIR_MPM])[kbw];//kbnw - //real fE = (D.f[DIR_P00 ])[k ];//ke - //real fW = (D.f[DIR_M00 ])[kw ]; - //real fN = (D.f[DIR_0P0 ])[k ];//kn - //real fS = (D.f[DIR_0M0 ])[ks ]; - //real fT = (D.f[DIR_00P ])[k ];//kt - //real fB = (D.f[DIR_00M ])[kb ]; - //real fNE = (D.f[DIR_PP0 ])[k ];//kne - //real fSW = (D.f[DIR_MM0 ])[ksw]; - //real fSE = (D.f[DIR_PM0 ])[ks ];//kse - //real fNW = (D.f[DIR_MP0 ])[kw ];//knw - //real fTE = (D.f[DIR_P0P ])[k ];//kte - //real fBW = (D.f[DIR_M0M ])[kbw]; - //real fBE = (D.f[DIR_P0M ])[kb ];//kbe - //real fTW = (D.f[DIR_M0P ])[kw ];//ktw - //real fTN = (D.f[DIR_0PP ])[k ];//ktn - //real fBS = (D.f[DIR_0MM ])[kbs]; - //real fBN = (D.f[DIR_0PM ])[kb ];//kbn - //real fTS = (D.f[DIR_0MP ])[ks ];//kts + //real fE = (D.f[DIR_P00])[k ];//ke + //real fW = (D.f[DIR_M00])[kw ]; + //real fN = (D.f[DIR_0P0])[k ];//kn + //real fS = (D.f[DIR_0M0])[ks ]; + //real fT = (D.f[DIR_00P])[k ];//kt + //real fB = (D.f[DIR_00M])[kb ]; + //real fNE = (D.f[DIR_PP0])[k ];//kne + //real fSW = (D.f[DIR_MM0])[ksw]; + //real fSE = (D.f[DIR_PM0])[ks ];//kse + //real fNW = (D.f[DIR_MP0])[kw ];//knw + //real fTE = (D.f[DIR_P0P])[k ];//kte + //real fBW = (D.f[DIR_M0M])[kbw]; + //real fBE = (D.f[DIR_P0M])[kb ];//kbe + //real fTW = (D.f[DIR_M0P])[kw ];//ktw + //real fTN = (D.f[DIR_0PP])[k ];//ktn + //real fBS = (D.f[DIR_0MM])[kbs]; + //real fBN = (D.f[DIR_0PM])[kb ];//kbn + //real fTS = (D.f[DIR_0MP])[ks ];//kts //real fZERO = (D.f[DIR_000])[k ];//kzero - //real fTNE = (D.f[DIR_PPP ])[k ];//ktne - //real fTSW = (D.f[DIR_MMP ])[ksw];//ktsw - //real fTSE = (D.f[DIR_PMP ])[ks ];//ktse - //real fTNW = (D.f[DIR_MPP ])[kw ];//ktnw - //real fBNE = (D.f[DIR_PPM ])[kb ];//kbne - //real fBSW = (D.f[DIR_MMM ])[kbsw]; - //real fBSE = (D.f[DIR_PMM ])[kbs];//kbse - //real fBNW = (D.f[DIR_MPM ])[kbw];//kbnw + //real fTNE = (D.f[DIR_PPP])[k ];//ktne + //real fTSW = (D.f[DIR_MMP])[ksw];//ktsw + //real fTSE = (D.f[DIR_PMP])[ks ];//ktse + //real fTNW = (D.f[DIR_MPP])[kw ];//ktnw + //real fBNE = (D.f[DIR_PPM])[kb ];//kbne + //real fBSW = (D.f[DIR_MMM])[kbsw]; + //real fBSE = (D.f[DIR_PMM])[kbs];//kbse + //real fBNW = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// real f7ZERO = (D7.f[0])[k]; real f7E = (D7.f[1])[k]; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu index 4ad8a4678ae2e4025a90f639ae366311a247e4b3..f2a9feaa998b628fb782844d1a7d946317e5af5f 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu @@ -2,6 +2,7 @@ #include "ADIncomp27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter void ADIncomp27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_AD_Incomp_27 << < grid, threads >> >( para->getParD(level)->diffusivity, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->distributionsAD27.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( + para->getParD(level)->diffusivity, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->distributionsAD27.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed"); } ADIncomp27::ADIncomp27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu index e686825ed100417110b02360876dec076553d7de..f9fdcee0f34106b05da0edc16e3fdd89f859752e 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu @@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity, unsigned int* neighborZ, real* DDStart, real* DD27, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -38,125 +38,125 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } Distributions27 D27; if (EvenOrOdd == true) { - D27.f[DIR_P00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_M00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes]; } else { - D27.f[DIR_M00] = &DD27[DIR_P00 *size_Mat]; - D27.f[DIR_P00] = &DD27[DIR_M00 *size_Mat]; - D27.f[DIR_0M0] = &DD27[DIR_0P0 *size_Mat]; - D27.f[DIR_0P0] = &DD27[DIR_0M0 *size_Mat]; - D27.f[DIR_00M] = &DD27[DIR_00P *size_Mat]; - D27.f[DIR_00P] = &DD27[DIR_00M *size_Mat]; - D27.f[DIR_MM0] = &DD27[DIR_PP0 *size_Mat]; - D27.f[DIR_PP0] = &DD27[DIR_MM0 *size_Mat]; - D27.f[DIR_MP0] = &DD27[DIR_PM0 *size_Mat]; - D27.f[DIR_PM0] = &DD27[DIR_MP0 *size_Mat]; - D27.f[DIR_M0M] = &DD27[DIR_P0P *size_Mat]; - D27.f[DIR_P0P] = &DD27[DIR_M0M *size_Mat]; - D27.f[DIR_M0P] = &DD27[DIR_P0M *size_Mat]; - D27.f[DIR_P0M] = &DD27[DIR_M0P *size_Mat]; - D27.f[DIR_0MM] = &DD27[DIR_0PP *size_Mat]; - D27.f[DIR_0PP] = &DD27[DIR_0MM *size_Mat]; - D27.f[DIR_0MP] = &DD27[DIR_0PM *size_Mat]; - D27.f[DIR_0PM] = &DD27[DIR_0MP *size_Mat]; - D27.f[DIR_000] = &DD27[DIR_000*size_Mat]; - D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat]; - D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat]; - D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat]; - D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat]; - D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat]; - D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat]; - D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat]; - D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat]; + D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes]; + D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes]; + D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes]; + D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes]; + D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes]; + D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes]; + D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes]; + D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes]; + D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes]; + D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes]; + D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes]; + D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes]; + D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes]; + D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes]; + D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes]; + D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes]; + D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes]; + D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes]; + D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes]; + D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes]; + D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes]; + D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes]; + D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes]; + D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes]; + D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes]; + D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes]; + D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -197,33 +197,33 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity, real fTNW = (D.f[DIR_PMM])[kbs];//kbse real fTSE = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// - //real f27E = (D27.f[DIR_P00 ])[k ];//ke - //real f27W = (D27.f[DIR_M00 ])[kw ]; - //real f27N = (D27.f[DIR_0P0 ])[k ];//kn - //real f27S = (D27.f[DIR_0M0 ])[ks ]; - //real f27T = (D27.f[DIR_00P ])[k ];//kt - //real f27B = (D27.f[DIR_00M ])[kb ]; - //real f27NE = (D27.f[DIR_PP0 ])[k ];//kne - //real f27SW = (D27.f[DIR_MM0 ])[ksw]; - //real f27SE = (D27.f[DIR_PM0 ])[ks ];//kse - //real f27NW = (D27.f[DIR_MP0 ])[kw ];//knw - //real f27TE = (D27.f[DIR_P0P ])[k ];//kte - //real f27BW = (D27.f[DIR_M0M ])[kbw]; - //real f27BE = (D27.f[DIR_P0M ])[kb ];//kbe - //real f27TW = (D27.f[DIR_M0P ])[kw ];//ktw - //real f27TN = (D27.f[DIR_0PP ])[k ];//ktn - //real f27BS = (D27.f[DIR_0MM ])[kbs]; - //real f27BN = (D27.f[DIR_0PM ])[kb ];//kbn - //real f27TS = (D27.f[DIR_0MP ])[ks ];//kts + //real f27E = (D27.f[DIR_P00])[k ];//ke + //real f27W = (D27.f[DIR_M00])[kw ]; + //real f27N = (D27.f[DIR_0P0])[k ];//kn + //real f27S = (D27.f[DIR_0M0])[ks ]; + //real f27T = (D27.f[DIR_00P])[k ];//kt + //real f27B = (D27.f[DIR_00M])[kb ]; + //real f27NE = (D27.f[DIR_PP0])[k ];//kne + //real f27SW = (D27.f[DIR_MM0])[ksw]; + //real f27SE = (D27.f[DIR_PM0])[ks ];//kse + //real f27NW = (D27.f[DIR_MP0])[kw ];//knw + //real f27TE = (D27.f[DIR_P0P])[k ];//kte + //real f27BW = (D27.f[DIR_M0M])[kbw]; + //real f27BE = (D27.f[DIR_P0M])[kb ];//kbe + //real f27TW = (D27.f[DIR_M0P])[kw ];//ktw + //real f27TN = (D27.f[DIR_0PP])[k ];//ktn + //real f27BS = (D27.f[DIR_0MM])[kbs]; + //real f27BN = (D27.f[DIR_0PM])[kb ];//kbn + //real f27TS = (D27.f[DIR_0MP])[ks ];//kts //real f27ZERO = (D27.f[DIR_000])[k ];//kzero - //real f27TNE = (D27.f[DIR_PPP ])[k ];//ktne - //real f27TSW = (D27.f[DIR_MMP ])[ksw];//ktsw - //real f27TSE = (D27.f[DIR_PMP ])[ks ];//ktse - //real f27TNW = (D27.f[DIR_MPP ])[kw ];//ktnw - //real f27BNE = (D27.f[DIR_PPM ])[kb ];//kbne - //real f27BSW = (D27.f[DIR_MMM ])[kbsw]; - //real f27BSE = (D27.f[DIR_PMM ])[kbs];//kbse - //real f27BNW = (D27.f[DIR_MPM ])[kbw];//kbnw + //real f27TNE = (D27.f[DIR_PPP])[k ];//ktne + //real f27TSW = (D27.f[DIR_MMP])[ksw];//ktsw + //real f27TSE = (D27.f[DIR_PMP])[ks ];//ktse + //real f27TNW = (D27.f[DIR_MPP])[kw ];//ktnw + //real f27BNE = (D27.f[DIR_PPM])[kb ];//kbne + //real f27BSW = (D27.f[DIR_MMM])[kbsw]; + //real f27BSE = (D27.f[DIR_PMM])[kbs];//kbse + //real f27BNW = (D27.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// //real vx1 = ((fTNE-fBSW)+(fBNE-fTSW)+(fTSE-fBNW)+(fBSE-fTNW) +(fNE-fSW)+(fSE-fNW)+(fTE-fBW)+(fBE-fTW)+(fE-fW)); //real vx2 = ((fTNE-fBSW)+(fBNE-fTSW)+(fBNW-fTSE)+(fTNW-fBSE) +(fNE-fSW)+(fNW-fSE)+(fTN-fBS)+(fBN-fTS)+(fN-fS)); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh index a6d94de4fadb9a93a9e5fed63d87731b12ec2a07..3abee563f676910f422bba0930060c2a0b0c0e21 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh @@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity, unsigned int* neighborZ, real* DDStart, real* DD27, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); #endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu index 27da776eb7612307fa4f9af2886594fc0c75d90b..d0c6a6a24ab4d0ebebee9324bdafa1f9e3db51b9 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu @@ -2,6 +2,7 @@ #include "ADIncomp7_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter> void ADIncomp7::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_AD_Incomp_7 << < grid, threads >> >( para->getParD(level)->diffusivity, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->distributionsAD7.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>( + para->getParD(level)->diffusivity, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->distributionsAD7.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed"); } ADIncomp7::ADIncomp7(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu index d49b0b48d20d976076a52f804d485b68da55348e..e0bcc4e515b1b2ccf71f1050e2d572b60a40d94b 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu @@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity, unsigned int* neighborZ, real* DDStart, real* DD7, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd) { //////////////////////////////////////////////////////////////////////////////// @@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity, const unsigned k = nx*(ny*z + y) + x; ////////////////////////////////////////////////////////////////////////// - if (k<size_Mat) + if (k<numberOfLBnodes) { //////////////////////////////////////////////////////////////////////////////// unsigned int BC; @@ -38,85 +38,85 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } Distributions7 D7; if (EvenOrOdd == true) { - D7.f[0] = &DD7[0 * size_Mat]; - D7.f[1] = &DD7[1 * size_Mat]; - D7.f[2] = &DD7[2 * size_Mat]; - D7.f[3] = &DD7[3 * size_Mat]; - D7.f[4] = &DD7[4 * size_Mat]; - D7.f[5] = &DD7[5 * size_Mat]; - D7.f[6] = &DD7[6 * size_Mat]; + D7.f[0] = &DD7[0 * numberOfLBnodes]; + D7.f[1] = &DD7[1 * numberOfLBnodes]; + D7.f[2] = &DD7[2 * numberOfLBnodes]; + D7.f[3] = &DD7[3 * numberOfLBnodes]; + D7.f[4] = &DD7[4 * numberOfLBnodes]; + D7.f[5] = &DD7[5 * numberOfLBnodes]; + D7.f[6] = &DD7[6 * numberOfLBnodes]; } else { - D7.f[0] = &DD7[0 * size_Mat]; - D7.f[2] = &DD7[1 * size_Mat]; - D7.f[1] = &DD7[2 * size_Mat]; - D7.f[4] = &DD7[3 * size_Mat]; - D7.f[3] = &DD7[4 * size_Mat]; - D7.f[6] = &DD7[5 * size_Mat]; - D7.f[5] = &DD7[6 * size_Mat]; + D7.f[0] = &DD7[0 * numberOfLBnodes]; + D7.f[2] = &DD7[1 * numberOfLBnodes]; + D7.f[1] = &DD7[2 * numberOfLBnodes]; + D7.f[4] = &DD7[3 * numberOfLBnodes]; + D7.f[3] = &DD7[4 * numberOfLBnodes]; + D7.f[6] = &DD7[5 * numberOfLBnodes]; + D7.f[5] = &DD7[6 * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// @@ -156,33 +156,33 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity, real fTNE = (D.f[DIR_MMM])[kbsw]; real fTNW = (D.f[DIR_PMM])[kbs];//kbse real fTSE = (D.f[DIR_MPM])[kbw];//kbnw - //real fE = (D.f[DIR_P00 ])[k ];//ke - //real fW = (D.f[DIR_M00 ])[kw ]; - //real fN = (D.f[DIR_0P0 ])[k ];//kn - //real fS = (D.f[DIR_0M0 ])[ks ]; - //real fT = (D.f[DIR_00P ])[k ];//kt - //real fB = (D.f[DIR_00M ])[kb ]; - //real fNE = (D.f[DIR_PP0 ])[k ];//kne - //real fSW = (D.f[DIR_MM0 ])[ksw]; - //real fSE = (D.f[DIR_PM0 ])[ks ];//kse - //real fNW = (D.f[DIR_MP0 ])[kw ];//knw - //real fTE = (D.f[DIR_P0P ])[k ];//kte - //real fBW = (D.f[DIR_M0M ])[kbw]; - //real fBE = (D.f[DIR_P0M ])[kb ];//kbe - //real fTW = (D.f[DIR_M0P ])[kw ];//ktw - //real fTN = (D.f[DIR_0PP ])[k ];//ktn - //real fBS = (D.f[DIR_0MM ])[kbs]; - //real fBN = (D.f[DIR_0PM ])[kb ];//kbn - //real fTS = (D.f[DIR_0MP ])[ks ];//kts + //real fE = (D.f[DIR_P00])[k ];//ke + //real fW = (D.f[DIR_M00])[kw ]; + //real fN = (D.f[DIR_0P0])[k ];//kn + //real fS = (D.f[DIR_0M0])[ks ]; + //real fT = (D.f[DIR_00P])[k ];//kt + //real fB = (D.f[DIR_00M])[kb ]; + //real fNE = (D.f[DIR_PP0])[k ];//kne + //real fSW = (D.f[DIR_MM0])[ksw]; + //real fSE = (D.f[DIR_PM0])[ks ];//kse + //real fNW = (D.f[DIR_MP0])[kw ];//knw + //real fTE = (D.f[DIR_P0P])[k ];//kte + //real fBW = (D.f[DIR_M0M])[kbw]; + //real fBE = (D.f[DIR_P0M])[kb ];//kbe + //real fTW = (D.f[DIR_M0P])[kw ];//ktw + //real fTN = (D.f[DIR_0PP])[k ];//ktn + //real fBS = (D.f[DIR_0MM])[kbs]; + //real fBN = (D.f[DIR_0PM])[kb ];//kbn + //real fTS = (D.f[DIR_0MP])[ks ];//kts //real fZERO = (D.f[DIR_000])[k ];//kzero - //real fTNE = (D.f[DIR_PPP ])[k ];//ktne - //real fTSW = (D.f[DIR_MMP ])[ksw];//ktsw - //real fTSE = (D.f[DIR_PMP ])[ks ];//ktse - //real fTNW = (D.f[DIR_MPP ])[kw ];//ktnw - //real fBNE = (D.f[DIR_PPM ])[kb ];//kbne - //real fBSW = (D.f[DIR_MMM ])[kbsw]; - //real fBSE = (D.f[DIR_PMM ])[kbs];//kbse - //real fBNW = (D.f[DIR_MPM ])[kbw];//kbnw + //real fTNE = (D.f[DIR_PPP])[k ];//ktne + //real fTSW = (D.f[DIR_MMP])[ksw];//ktsw + //real fTSE = (D.f[DIR_PMP])[ks ];//ktse + //real fTNW = (D.f[DIR_MPP])[kw ];//ktnw + //real fBNE = (D.f[DIR_PPM])[kb ];//kbne + //real fBSW = (D.f[DIR_MMM])[kbsw]; + //real fBSE = (D.f[DIR_PMM])[kbs];//kbse + //real fBNW = (D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////// real f7ZERO = (D7.f[0])[k]; real f7E = (D7.f[1])[k]; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh index 25a17ddbd7038635a2beb2c39212822cbf762034..845ecda946a4e45678082b72b5c74dc96e5810c5 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh @@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity, unsigned int* neighborZ, real* DDStart, real* DD7, - int size_Mat, + unsigned long long numberOfLBnodes, bool EvenOrOdd); #endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu index d2f9f60890379d07ecc3d04f4a54d59a0754907a..8c99f3b030984aef6215d5479be4b321145ee54f 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu @@ -2,6 +2,7 @@ #include "BGKCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Paramet void BGKCompSP27::run() { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_BGK_Comp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_BGK_Comp_SP_27<<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed"); } BGKCompSP27::BGKCompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu index 09196d13e94a2404ba280e8a8e9394f0a79e8211..3bdb65c455bd67d66e8b35961f2fa7e1de45f763 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu @@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Comp_SP_27( real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu index beebda2437ca4e7385ab812b9106edabe213227e..a4b136d1c21b1e4c68432eef5e21ff8c968bdfec 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu @@ -2,6 +2,7 @@ #include "BGKPlusCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr void BGKPlusCompSP27::run() { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_BGK_Plus_Comp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - size_Mat, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_BGK_Plus_Comp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed"); } BGKPlusCompSP27::BGKPlusCompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu index 325f65ece9baddf88adc91baa753bdfc4bd0eced..1f44fee9ea8b20241f87bea6310c96db2b82d1c4 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu @@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -127,33 +127,33 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27( //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu index 3d7f6fb9a8980454ebc83c51c7dd8865688fa166..1107d343801f8ac3626b03a93ca92415217732ac 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu @@ -6,6 +6,7 @@ #include "../RunLBMKernel.cuh" #include <lbm/BGK.h> +#include <lbm/KernelParameter.h> namespace vf @@ -31,15 +32,16 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level) void BGKUnified::run() { - GPUKernelParameter kernelParameter{ para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - (int)para->getParD(level)->numberOfNodes, - nullptr, /* forces not used in bgk kernel */ - para->getParD(level)->isEvenTimestep }; + GPUKernelParameter kernelParameter{ + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + (int)para->getParD(level)->numberOfNodes, + nullptr, /* forces not used in bgk kernel */ + para->getParD(level)->isEvenTimestep }; auto lambda = [] __device__(lbm::KernelParameter parameter) { return lbm::bgk(parameter); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu index eca3a9953024e44fd91e7f9f98956e4329574d09..dcfda06db462fd83120751a32a40365445d659ba 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu @@ -2,6 +2,7 @@ #include "CascadeCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr void CascadeCompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cascade_Comp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cascade_Comp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed"); } CascadeCompSP27::CascadeCompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu index 3f69fa47288343fbdd91e77dbb7f154501349098..af0a7c118191243c80c420856a70711a1fc17d2b 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega, Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ];//[ke ]; - real mfabb = (D.f[DIR_M00 ])[kw ];//[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ];//[kn ]; - real mfbab = (D.f[DIR_0M0 ])[ks ];//[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ];//[kt ]; - real mfbba = (D.f[DIR_00M ])[kb ];//[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ];//[kne ]; - real mfaab = (D.f[DIR_MM0 ])[ksw];//[ksw ]; - real mfcab = (D.f[DIR_PM0 ])[ks ];//[kse ]; - real mfacb = (D.f[DIR_MP0 ])[kw ];//[knw ]; - real mfcbc = (D.f[DIR_P0P ])[k ];//[kte ]; - real mfaba = (D.f[DIR_M0M ])[kbw];//[kbw ]; - real mfcba = (D.f[DIR_P0M ])[kb ];//[kbe ]; - real mfabc = (D.f[DIR_M0P ])[kw ];//[ktw ]; - real mfbcc = (D.f[DIR_0PP ])[k ];//[ktn ]; - real mfbaa = (D.f[DIR_0MM ])[kbs];//[kbs ]; - real mfbca = (D.f[DIR_0PM ])[kb ];//[kbn ]; - real mfbac = (D.f[DIR_0MP ])[ks ];//[kts ]; + real mfcbb = (D.f[DIR_P00])[k ];//[ke ]; + real mfabb = (D.f[DIR_M00])[kw ];//[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ];//[kn ]; + real mfbab = (D.f[DIR_0M0])[ks ];//[ks ]; + real mfbbc = (D.f[DIR_00P])[k ];//[kt ]; + real mfbba = (D.f[DIR_00M])[kb ];//[kb ]; + real mfccb = (D.f[DIR_PP0])[k ];//[kne ]; + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ]; + real mfcab = (D.f[DIR_PM0])[ks ];//[kse ]; + real mfacb = (D.f[DIR_MP0])[kw ];//[knw ]; + real mfcbc = (D.f[DIR_P0P])[k ];//[kte ]; + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ]; + real mfcba = (D.f[DIR_P0M])[kb ];//[kbe ]; + real mfabc = (D.f[DIR_M0P])[kw ];//[ktw ]; + real mfbcc = (D.f[DIR_0PP])[k ];//[ktn ]; + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ]; + real mfbca = (D.f[DIR_0PM])[kb ];//[kbn ]; + real mfbac = (D.f[DIR_0MP])[ks ];//[kts ]; real mfbbb = (D.f[DIR_000])[k ];//[kzero]; - real mfccc = (D.f[DIR_PPP ])[k ];//[ktne ]; - real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ]; - real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ]; - real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ]; - real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ]; - real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ] - real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ]; - real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ]; + real mfccc = (D.f[DIR_PPP])[k ];//[ktne ]; + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; + real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ]; + real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ]; + real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ]; + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ] + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; //////////////////////////////////////////////////////////////////////////////////// real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu index 3f45c7ea71c385f948eac2e052a8d970010c413d..7817c398285dda131401bd14c3ccdd8c119c5680 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu @@ -2,6 +2,7 @@ #include "CumulantCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_p void CumulantCompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cum_Comp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cum_Comp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cum_Comp_SP_27 execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu index ad2ffdf4170d98125e6758c0e2f548122093cea6..1dfab5846795e61509cdba28478fe6ce623983b5 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cum_Comp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu index 9a84df86e41b3fdff75c2ebf580813afc5ee3feb..1518dcc209de1edf8a88dae72c1f10c3d4666610 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu @@ -1,8 +1,8 @@ #include "CumulantAll4CompSP27.h" #include "CumulantAll4CompSP27_Device.cuh" - #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -11,36 +11,21 @@ std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std:: void CumulantAll4CompSP27::run() { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cumulant_D3Q27All4 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - size_Mat, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cumulant_D3Q27All4 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed"); } CumulantAll4CompSP27::CumulantAll4CompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu index 681dbff2ba37a1e0de56341b39cc2dec791f656b..3593b41c4c62c8a8b19719e22e9d65d6b5fd987d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu @@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4( real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -160,33 +160,33 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4( real omega, //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu index 1b6ba1a2278b68f085a4b7df699b7ca230811f39..5a480e5d9c97126e491655b4bbe2aeefef3e7161 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu @@ -12,7 +12,7 @@ std::shared_ptr<CumulantK15Comp> CumulantK15Comp::getNewInstance(std::shared_ptr void CumulantK15Comp::run() { int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int Grid = (size_Mat / numberOfThreads) + 1; int Grid1, Grid2; @@ -29,16 +29,17 @@ void CumulantK15Comp::run() dim3 grid(Grid1, Grid2, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_CumulantK15Comp <<< grid, threads >>>(para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - size_Mat, - level, - para->getForcesDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_CumulantK15Comp <<< grid, threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_CumulantK15Comp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu index 93d57d6c9871d66537f25b9188467d46e3b3d05c..f7fb1f0a6441cfc6f38ad9684fd5bc8dd1be7135 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu @@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu index 188984d001f89d72c967dd6390ca10ae5d2eab32..51876f30b8c8e37d8cb3355edde5dcf2b04675d0 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu @@ -2,6 +2,7 @@ #include "CumulantK15BulkComp_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,35 +11,20 @@ std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::sh void CumulantK15BulkComp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_CumulantK15BulkComp <<< grid, threads >>>(para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_CumulantK15BulkComp <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed"); } CumulantK15BulkComp::CumulantK15BulkComp(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu index d2a2f61df902cfd7c5ef52b09f8e7738a108615e..085775d324bf65d783afdd745c06429d697c3788 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu @@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu index d28c077031ff9125d1cbc1187def1d1d8fe4d6e8..613464125bafc572fe7951b8c372e3455ea5b21d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu @@ -2,6 +2,7 @@ #include "CumulantK15SpongeComp_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,36 +11,21 @@ std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std void CumulantK15SpongeComp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_CumulantK15SpongeComp <<< grid, threads >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->coordinateX, - para->getParD(level)->coordinateY, - para->getParD(level)->coordinateZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_CumulantK15SpongeComp <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->coordinateX, + para->getParD(level)->coordinateY, + para->getParD(level)->coordinateZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed"); } CumulantK15SpongeComp::CumulantK15SpongeComp(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu index c2144d324aa3378e8fc9fc5b511bbed385b48a84..13788e65e70eb30803111a39a70d39682648a006 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu @@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu index 0b72b46cf25f331172be4abb8dded6d8e5e2b9c5..24b0bbc6f43a63093da6b6dcb3ce401b8a614f75 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu @@ -30,15 +30,16 @@ CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int leve void CumulantK15Unified::run() { - GPUKernelParameter kernelParameter{ para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - (int)para->getParD(level)->numberOfNodes, - para->getParD(level)->forcing, - para->getParD(level)->isEvenTimestep }; + GPUKernelParameter kernelParameter{ + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + (int)para->getParD(level)->numberOfNodes, + para->getParD(level)->forcing, + para->getParD(level)->isEvenTimestep }; auto lambda = [] __device__(lbm::KernelParameter parameter) { return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu new file mode 100644 index 0000000000000000000000000000000000000000..ea3442fecca63fdcb45878d742a547ce492ab5c8 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu @@ -0,0 +1,140 @@ +#include "CumulantK17.h" +#include <logger/Logger.h> +#include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" +#include "CumulantK17_Device.cuh" + +#include <cuda.h> + +template<TurbulenceModel turbulenceModel> +std::shared_ptr< CumulantK17<turbulenceModel> > CumulantK17<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level) +{ + return std::shared_ptr<CumulantK17<turbulenceModel> >(new CumulantK17<turbulenceModel>(para,level)); +} + +template<TurbulenceModel turbulenceModel> +void CumulantK17<turbulenceModel>::run() +{ + LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default], + para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]); + + getLastCudaError("LB_Kernel_CumulantK17 execution failed"); +} + +template<TurbulenceModel turbulenceModel> +void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex ) +{ + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); + + switch (collisionTemplate) + { + case CollisionTemplate::Default: + LB_Kernel_CumulantK17 < turbulenceModel, false, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; + + case CollisionTemplate::WriteMacroVars: + LB_Kernel_CumulantK17 < turbulenceModel, true, false > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; + + case CollisionTemplate::SubDomainBorder: + case CollisionTemplate::AllFeatures: + LB_Kernel_CumulantK17 < turbulenceModel, true, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; case CollisionTemplate::ApplyBodyForce: + LB_Kernel_CumulantK17 < turbulenceModel, false, true > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega, + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->rho, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getParD(level)->turbViscosity, + para->getSGSConstant(), + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP, + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep, + indices, + size_indices); + break; default: + throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()"); + break; + } + + getLastCudaError("LB_Kernel_CumulantK17 execution failed"); +} + +template<TurbulenceModel turbulenceModel> +CumulantK17<turbulenceModel>::CumulantK17(std::shared_ptr<Parameter> para, int level) +{ + this->para = para; + this->level = level; + + myPreProcessorTypes.push_back(InitCompSP27); + + myKernelGroup = BasicKernel; + + this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + this->kernelUsesFluidNodeIndices = true; + + VF_LOG_INFO("Using turbulence model: {}", turbulenceModel); +} + +template class CumulantK17<TurbulenceModel::AMD>; +template class CumulantK17<TurbulenceModel::Smagorinsky>; +template class CumulantK17<TurbulenceModel::QR>; +template class CumulantK17<TurbulenceModel::None>; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h new file mode 100644 index 0000000000000000000000000000000000000000..00c79a30c9ccf9a89901165d020fc85d5a479c1d --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h @@ -0,0 +1,20 @@ +#ifndef CUMULANT_K17_H +#define CUMULANT_K17_H + +#include "Kernel/KernelImp.h" +#include "Parameter/Parameter.h" + +template<TurbulenceModel turbulenceModel> +class CumulantK17 : public KernelImp +{ +public: + static std::shared_ptr< CumulantK17<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level); + void run() override; + void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex) override; + +private: + CumulantK17(); + CumulantK17(std::shared_ptr<Parameter> para, int level); +}; + +#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu deleted file mode 100644 index b176b94d07e7f280d738a797d5bd853095e3caed..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu +++ /dev/null @@ -1,33 +0,0 @@ -#include "CumulantK17Comp.h" - -#include "Parameter/Parameter.h" -#include "CumulantK17Comp_Device.cuh" -#include "cuda/CudaGrid.h" - -std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr<Parameter> para, int level) -{ - return std::shared_ptr<CumulantK17Comp>(new CumulantK17Comp(para,level)); -} - -void CumulantK17Comp::run() -{ - LB_Kernel_CumulantK17Comp <<< cudaGrid.grid, cudaGrid.threads >>>(para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK17Comp execution failed"); -} - -CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) -{ - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; - this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); -} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h deleted file mode 100644 index 22a95a688e5d078d7b710f494bfea360c9af0d6b..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CUMULANT_K17_COMP_H -#define CUMULANT_K17_COMP_H - -#include "Kernel/KernelImp.h" - -class CumulantK17Comp : public KernelImp -{ -public: - static std::shared_ptr<CumulantK17Comp> getNewInstance(std::shared_ptr< Parameter> para, int level); - void run(); - -private: - CumulantK17Comp(); - CumulantK17Comp(std::shared_ptr< Parameter> para, int level); -}; - -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu deleted file mode 100644 index 7cf27aa883cbfd3a0e4a0a36fa61649a62d06eeb..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu +++ /dev/null @@ -1,1040 +0,0 @@ -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; -#include "math.h" - - -__global__ void LB_Kernel_CumulantK17Comp(real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - real* quadricLimiters, - bool EvenOrOdd) -{ - //////////////////////////////////////////////////////////////////////////////// - const unsigned x = threadIdx.x; // Globaler x-Index - const unsigned y = blockIdx.x; // Globaler y-Index - const unsigned z = blockIdx.y; // Globaler z-Index - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k = nx*(ny*z + y) + x; - ////////////////////////////////////////////////////////////////////////// - - if (k<size_Mat) - { - //////////////////////////////////////////////////////////////////////////////// - unsigned int BC; - BC = bcMatD[k]; - - if ((BC != GEO_SOLID) && (BC != GEO_VOID)) - { - Distributions27 D; - if (EvenOrOdd == true) - { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; - } - else - { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; - } - - //////////////////////////////////////////////////////////////////////////////// - //index - //unsigned int kzero= k; - //unsigned int ke = k; - unsigned int kw = neighborX[k]; - //unsigned int kn = k; - unsigned int ks = neighborY[k]; - //unsigned int kt = k; - unsigned int kb = neighborZ[k]; - unsigned int ksw = neighborY[kw]; - //unsigned int kne = k; - //unsigned int kse = ks; - //unsigned int knw = kw; - unsigned int kbw = neighborZ[kw]; - //unsigned int kte = k; - //unsigned int kbe = kb; - //unsigned int ktw = kw; - unsigned int kbs = neighborZ[ks]; - //unsigned int ktn = k; - //unsigned int kbn = kb; - //unsigned int kts = ks; - //unsigned int ktse = ks; - //unsigned int kbnw = kbw; - //unsigned int ktnw = kw; - //unsigned int kbse = kbs; - //unsigned int ktsw = ksw; - //unsigned int kbne = kb; - //unsigned int ktne = k; - unsigned int kbsw = neighborZ[ksw]; - - //unsigned int kzero= k; - //unsigned int ke = k; - //unsigned int kw = neighborX[k]; - //unsigned int kn = k; - //unsigned int ks = neighborY[k]; - //unsigned int kt = k; - //unsigned int kb = neighborZ[k]; - //unsigned int ksw = neighborY[kw]; - //unsigned int kne = k; - //unsigned int kse = ks; - //unsigned int knw = kw; - //unsigned int kbw = neighborZ[kw]; - //unsigned int kte = k; - //unsigned int kbe = kb; - //unsigned int ktw = kw; - //unsigned int kbs = neighborZ[ks]; - //unsigned int ktn = k; - //unsigned int kbn = kb; - //unsigned int kts = ks; - //unsigned int ktse = ks; - //unsigned int kbnw = kbw; - //unsigned int ktnw = kw; - //unsigned int kbse = kbs; - //unsigned int ktsw = ksw; - //unsigned int kbne = kb; - //unsigned int ktne = k; - //unsigned int kbsw = neighborZ[ksw]; - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts - real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw - //////////////////////////////////////////////////////////////////////////////////// - real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; - - real rho = c1o1 + drho; - //////////////////////////////////////////////////////////////////////////////////// - //slow - //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + - // (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + - // ((mfabb+mfcbb) + (mfbab+mfbcb) + (mfbba+mfbbc)));//fehlt mfbbb - real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho; - real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho; - real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho; - //////////////////////////////////////////////////////////////////////////////////// - //the force be with you - real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; // - real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero; - real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero; - vvx += fx*c1o2; - vvy += fy*c1o2; - vvz += fz*c1o2; - //////////////////////////////////////////////////////////////////////////////////// - //real omega = omega_in; - //////////////////////////////////////////////////////////////////////////////////// - //fast - real oMdrho = c1o1; // comp special - //real oMdrho = one - (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + - // mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb + - // mfabb+mfcbb + mfbab+mfbcb + mfbba+mfbbc + mfbbb);//fehlt mfbbb nicht mehr - //real vvx =mfccc-mfaaa + mfcac-mfaca + mfcaa-mfacc + mfcca-mfaac + - // mfcba-mfabc + mfcbc-mfaba + mfcab-mfacb + mfccb-mfaab + - // mfcbb-mfabb; - //real vvy =mfccc-mfaaa + mfaca-mfcac + mfacc-mfcaa + mfcca-mfaac + - // mfbca-mfbac + mfbcc-mfbaa + mfacb-mfcab + mfccb-mfaab + - // mfbcb-mfbab; - //real vvz =mfccc-mfaaa + mfcac-mfaca + mfacc-mfcaa + mfaac-mfcca + - // mfbac-mfbca + mfbcc-mfbaa + mfabc-mfcba + mfcbc-mfaba + - // mfbbc-mfbba; - //////////////////////////////////////////////////////////////////////////////////// - // oMdrho assembler style -------> faaaaaastaaaa - // or much sloooowaaaa ... it dep�ndssssss on sadaku - real m0, m1, m2; - //real oMdrho; - //{ - // oMdrho=mfccc+mfaaa; - // m0=mfaca+mfcac; - // m1=mfacc+mfcaa; - // m2=mfaac+mfcca; - // oMdrho+=m0; - // m1+=m2; - // oMdrho+=m1; - // m0=mfbac+mfbca; - // m1=mfbaa+mfbcc; - // m0+=m1; - // m1=mfabc+mfcba; - // m2=mfaba+mfcbc; - // m1+=m2; - // m0+=m1; - // m1=mfacb+mfcab; - // m2=mfaab+mfccb; - // m1+=m2; - // m0+=m1; - // oMdrho+=m0; - // m0=mfabb+mfcbb; - // m1=mfbab+mfbcb; - // m2=mfbba+mfbbc; - // m0+=m1+m2; - // m0+=mfbbb; //hat gefehlt - // oMdrho = one - (oMdrho + m0); - //} - //real vvx; - real vx2; - //{ - // vvx = mfccc-mfaaa; - // m0 = mfcac-mfaca; - // m1 = mfcaa-mfacc; - // m2 = mfcca-mfaac; - // vvx+= m0; - // m1 += m2; - // vvx+= m1; - // vx2 = mfcba-mfabc; - // m0 = mfcbc-mfaba; - // m1 = mfcab-mfacb; - // m2 = mfccb-mfaab; - // vx2+= m0; - // m1 += m2; - // vx2+= m1; - // vvx+= vx2; - // vx2 = mfcbb-mfabb; - // vvx+= vx2; - //} - //real vvy; - real vy2; - //{ - // vvy = mfccc-mfaaa; - // m0 = mfaca-mfcac; - // m1 = mfacc-mfcaa; - // m2 = mfcca-mfaac; - // vvy+= m0; - // m1 += m2; - // vvy+= m1; - // vy2 = mfbca-mfbac; - // m0 = mfbcc-mfbaa; - // m1 = mfacb-mfcab; - // m2 = mfccb-mfaab; - // vy2+= m0; - // m1 += m2; - // vy2+= m1; - // vvy+= vy2; - // vy2 = mfbcb-mfbab; - // vvy+= vy2; - //} - //real vvz; - real vz2; - //{ - // vvz = mfccc-mfaaa; - // m0 = mfcac-mfaca; - // m1 = mfacc-mfcaa; - // m2 = mfaac-mfcca; - // vvz+= m0; - // m1 += m2; - // vvz+= m1; - // vz2 = mfbac-mfbca; - // m0 = mfbcc-mfbaa; - // m1 = mfabc-mfcba; - // m2 = mfcbc-mfaba; - // vz2+= m0; - // m1 += m2; - // vz2+= m1; - // vvz+= vz2; - // vz2 = mfbbc-mfbba; - // vvz+= vz2; - //} - vx2 = vvx*vvx; - vy2 = vvy*vvy; - vz2 = vvz*vvz; - //////////////////////////////////////////////////////////////////////////////////// - real wadjust; - real qudricLimitP = quadricLimiters[0]; //0.01f; // * 0.0001f; // 1000000.0f; // 1000000.0f; // - real qudricLimitM = quadricLimiters[1]; //0.01f; // * 0.0001f; // 1000000.0f; // 1000000.0f; // - real qudricLimitD = quadricLimiters[2]; //0.01f; // * 0.001f; // 1000000.0f; // 1000000.0f; // - //////////////////////////////////////////////////////////////////////////////////// - //Hin - //////////////////////////////////////////////////////////////////////////////////// - // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - m2 = mfaaa + mfaac; - m1 = mfaac - mfaaa; - m0 = m2 + mfaab; - mfaaa = m0; - m0 += c1o36 * oMdrho; - mfaab = m1 - m0 * vvz; - mfaac = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaba + mfabc; - m1 = mfabc - mfaba; - m0 = m2 + mfabb; - mfaba = m0; - m0 += c1o9 * oMdrho; - mfabb = m1 - m0 * vvz; - mfabc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaca + mfacc; - m1 = mfacc - mfaca; - m0 = m2 + mfacb; - mfaca = m0; - m0 += c1o36 * oMdrho; - mfacb = m1 - m0 * vvz; - mfacc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbaa + mfbac; - m1 = mfbac - mfbaa; - m0 = m2 + mfbab; - mfbaa = m0; - m0 += c1o9 * oMdrho; - mfbab = m1 - m0 * vvz; - mfbac = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbba + mfbbc; - m1 = mfbbc - mfbba; - m0 = m2 + mfbbb; - mfbba = m0; - m0 += c4o9 * oMdrho; - mfbbb = m1 - m0 * vvz; - mfbbc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbca + mfbcc; - m1 = mfbcc - mfbca; - m0 = m2 + mfbcb; - mfbca = m0; - m0 += c1o9 * oMdrho; - mfbcb = m1 - m0 * vvz; - mfbcc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcaa + mfcac; - m1 = mfcac - mfcaa; - m0 = m2 + mfcab; - mfcaa = m0; - m0 += c1o36 * oMdrho; - mfcab = m1 - m0 * vvz; - mfcac = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcba + mfcbc; - m1 = mfcbc - mfcba; - m0 = m2 + mfcbb; - mfcba = m0; - m0 += c1o9 * oMdrho; - mfcbb = m1 - m0 * vvz; - mfcbc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcca + mfccc; - m1 = mfccc - mfcca; - m0 = m2 + mfccb; - mfcca = m0; - m0 += c1o36 * oMdrho; - mfccb = m1 - m0 * vvz; - mfccc = m2 - c2o1* m1 * vvz + vz2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - // mit 1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - m2 = mfaaa + mfaca; - m1 = mfaca - mfaaa; - m0 = m2 + mfaba; - mfaaa = m0; - m0 += c1o6 * oMdrho; - mfaba = m1 - m0 * vvy; - mfaca = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaab + mfacb; - m1 = mfacb - mfaab; - m0 = m2 + mfabb; - mfaab = m0; - mfabb = m1 - m0 * vvy; - mfacb = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaac + mfacc; - m1 = mfacc - mfaac; - m0 = m2 + mfabc; - mfaac = m0; - m0 += c1o18 * oMdrho; - mfabc = m1 - m0 * vvy; - mfacc = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbaa + mfbca; - m1 = mfbca - mfbaa; - m0 = m2 + mfbba; - mfbaa = m0; - m0 += c2o3 * oMdrho; - mfbba = m1 - m0 * vvy; - mfbca = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbab + mfbcb; - m1 = mfbcb - mfbab; - m0 = m2 + mfbbb; - mfbab = m0; - mfbbb = m1 - m0 * vvy; - mfbcb = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfbac + mfbcc; - m1 = mfbcc - mfbac; - m0 = m2 + mfbbc; - mfbac = m0; - m0 += c2o9 * oMdrho; - mfbbc = m1 - m0 * vvy; - mfbcc = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcaa + mfcca; - m1 = mfcca - mfcaa; - m0 = m2 + mfcba; - mfcaa = m0; - m0 += c1o6 * oMdrho; - mfcba = m1 - m0 * vvy; - mfcca = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcab + mfccb; - m1 = mfccb - mfcab; - m0 = m2 + mfcbb; - mfcab = m0; - mfcbb = m1 - m0 * vvy; - mfccb = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfcac + mfccc; - m1 = mfccc - mfcac; - m0 = m2 + mfcbc; - mfcac = m0; - m0 += c1o18 * oMdrho; - mfcbc = m1 - m0 * vvy; - mfccc = m2 - c2o1* m1 * vvy + vy2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - // mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - m2 = mfaaa + mfcaa; - m1 = mfcaa - mfaaa; - m0 = m2 + mfbaa; - mfaaa = m0; - m0 += c1o1* oMdrho; - mfbaa = m1 - m0 * vvx; - mfcaa = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaba + mfcba; - m1 = mfcba - mfaba; - m0 = m2 + mfbba; - mfaba = m0; - mfbba = m1 - m0 * vvx; - mfcba = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaca + mfcca; - m1 = mfcca - mfaca; - m0 = m2 + mfbca; - mfaca = m0; - m0 += c1o3 * oMdrho; - mfbca = m1 - m0 * vvx; - mfcca = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaab + mfcab; - m1 = mfcab - mfaab; - m0 = m2 + mfbab; - mfaab = m0; - mfbab = m1 - m0 * vvx; - mfcab = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfabb + mfcbb; - m1 = mfcbb - mfabb; - m0 = m2 + mfbbb; - mfabb = m0; - mfbbb = m1 - m0 * vvx; - mfcbb = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfacb + mfccb; - m1 = mfccb - mfacb; - m0 = m2 + mfbcb; - mfacb = m0; - mfbcb = m1 - m0 * vvx; - mfccb = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfaac + mfcac; - m1 = mfcac - mfaac; - m0 = m2 + mfbac; - mfaac = m0; - m0 += c1o3 * oMdrho; - mfbac = m1 - m0 * vvx; - mfcac = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfabc + mfcbc; - m1 = mfcbc - mfabc; - m0 = m2 + mfbbc; - mfabc = m0; - mfbbc = m1 - m0 * vvx; - mfcbc = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - m2 = mfacc + mfccc; - m1 = mfccc - mfacc; - m0 = m2 + mfbcc; - mfacc = m0; - m0 += c1o9 * oMdrho; - mfbcc = m1 - m0 * vvx; - mfccc = m2 - c2o1* m1 * vvx + vx2 * m0; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////////////// - // Cumulants - //////////////////////////////////////////////////////////////////////////////////// - real OxxPyyPzz = c1o1; //set the bulk viscosity one is high / two is very low and zero is (too) high ... (also called omega 2) - - //////////////////////////////////////////////////////////// - //3. - ////////////////////////////// - real OxyyPxzz = c8o1*(-c2o1 + omega)*(c1o1 + c2o1*omega) / (-c8o1 - c14o1*omega + c7o1*omega*omega);//one; - real OxyyMxzz = c8o1*(-c2o1 + omega)*(-c7o1 + c4o1*omega) / (c56o1 - c50o1*omega + c9o1*omega*omega);//one; - real Oxyz = c24o1*(-c2o1 + omega)*(-c2o1 - c7o1*omega + c3o1*omega*omega) / (c48o1 + c152o1*omega - c130o1*omega*omega + c29o1*omega*omega*omega);//one; - //////////////////////////////////////////////////////////// - //4. - ////////////////////////////// - real O4 = c1o1; - ////////////////////////////// - //real O4 = omega;//TRT - //////////////////////////////////////////////////////////// - //5. - ////////////////////////////// - real O5 = c1o1; - //////////////////////////////////////////////////////////// - //6. - ////////////////////////////// - real O6 = c1o1; - //////////////////////////////////////////////////////////// - - - //central moments to cumulants - //4. - real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho; - real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho; - real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho; - - real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho)); - real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho)); - real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho)); - - //5. - real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho; - real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho; - real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho; - - //6. - - real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) / (rho * rho) - - c1o3 * (mfacc + mfcac + mfcca) / rho - - c1o9 * (mfcaa + mfaca + mfaac) / rho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3 - + c1o27*((drho * drho - drho) / (rho*rho))); - - //2. - // linear combinations - real mxxPyyPzz = mfcaa + mfaca + mfaac; - real mxxMyy = mfcaa - mfaca; - real mxxMzz = mfcaa - mfaac; - - //////////////////////////////////////////////////////////////////////////// - real Dxy = -c3o1*omega*mfbba; - real Dxz = -c3o1*omega*mfbab; - real Dyz = -c3o1*omega*mfabb; - - //3. - // linear combinations - - real mxxyPyzz = mfcba + mfabc; - real mxxyMyzz = mfcba - mfabc; - - real mxxzPyyz = mfcab + mfacb; - real mxxzMyyz = mfcab - mfacb; - - real mxyyPxzz = mfbca + mfbac; - real mxyyMxzz = mfbca - mfbac; - - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - //incl. correction (hat noch nicht so gut funktioniert...Optimierungsbedarf??) - - real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; - - //relax - mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz; - mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - ////no correction - //mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz);//-magicBulk*OxxPyyPzz; - //mxxMyy += -(-omega) * (-mxxMyy); - //mxxMzz += -(-omega) * (-mxxMzz); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - mfabb += omega * (-mfabb); - mfbab += omega * (-mfbab); - mfbba += omega * (-mfbba); - ////////////////////////////////////////////////////////////////////////// - - // linear combinations back - mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - mfaca = c1o3 * (-c2o1* mxxMyy + mxxMzz + mxxPyyPzz); - mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); - - - //relax - ////////////////////////////////////////////////////////////////////////// - //das ist der limiter - wadjust = Oxyz + (c1o1 - Oxyz)*abs(mfbbb) / (abs(mfbbb) + qudricLimitD); - mfbbb += wadjust * (-mfbbb); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz)*abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz)*abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - //ohne limiter - //mfbbb += OxyyMxzz * (-mfbbb); - //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - - // linear combinations back - mfcba = (mxxyMyzz + mxxyPyzz) * c1o2; - mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; - mfcab = (mxxzMyyz + mxxzPyyz) * c1o2; - mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; - mfbca = (mxyyMxzz + mxyyPxzz) * c1o2; - mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; - - //4. - ////////////////////////////////////////////////////////////////////////// - //mit limiter - // wadjust = O4+(one-O4)*abs(CUMacc)/(abs(CUMacc)+qudricLimit); - //CUMacc += wadjust * (-CUMacc); - // wadjust = O4+(one-O4)*abs(CUMcac)/(abs(CUMcac)+qudricLimit); - //CUMcac += wadjust * (-CUMcac); - // wadjust = O4+(one-O4)*abs(CUMcca)/(abs(CUMcca)+qudricLimit); - //CUMcca += wadjust * (-CUMcca); - - // wadjust = O4+(one-O4)*abs(CUMbbc)/(abs(CUMbbc)+qudricLimit); - //CUMbbc += wadjust * (-CUMbbc); - // wadjust = O4+(one-O4)*abs(CUMbcb)/(abs(CUMbcb)+qudricLimit); - //CUMbcb += wadjust * (-CUMbcb); - // wadjust = O4+(one-O4)*abs(CUMcbb)/(abs(CUMcbb)+qudricLimit); - //CUMcbb += wadjust * (-CUMcbb); - ////////////////////////////////////////////////////////////////////////// - real factorA = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega); - real factorB = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega); - ////////////////////////////////////////////////////////////////////////// - //ohne limiter - //CUMacc += O4 * (-CUMacc); - //CUMcac += O4 * (-CUMcac); - //CUMcca += O4 * (-CUMcca); - //CUMbbc += O4 * (-CUMbbc); - //CUMbcb += O4 * (-CUMbcb); - //CUMcbb += O4 * (-CUMcbb); - CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMacc); - CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMcac); - CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (CUMcca); - CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy * c1o3 * factorB + (c1o1 - O4) * (CUMbbc); - CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (CUMbcb); - CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (CUMcbb); - ////////////////////////////////////////////////////////////////////////// - - - //5. - CUMbcc += O5 * (-CUMbcc); - CUMcbc += O5 * (-CUMcbc); - CUMccb += O5 * (-CUMccb); - - //6. - CUMccc += O6 * (-CUMccc); - - - - //back cumulants to central moments - //4. - mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho; - mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho; - mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho; - - mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho)); - mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho)); - mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho)); - - //5. - mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho; - mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho; - mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho; - - //6. - - mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho - + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2o1 * (mfcaa * mfaca * mfaac) - + c16o1 * mfbba * mfbab * mfabb) / (rho * rho) - - c1o3 * (mfacc + mfcac + mfcca) / rho - - c1o9 * (mfcaa + mfaca + mfaac) / rho - + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3 - + c1o27*((drho * drho - drho) / (rho*rho))); - //////////////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////////////// - //the force be with you - mfbaa = -mfbaa; - mfaba = -mfaba; - mfaab = -mfaab; - //////////////////////////////////////////////////////////////////////////////////// - - - //////////////////////////////////////////////////////////////////////////////////// - //back - //////////////////////////////////////////////////////////////////////////////////// - //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + c1o1* oMdrho) * (vz2 - vvz) * c1o2; - m1 = -mfaac - c2o1* mfaab * vvz + mfaaa * (c1o1 - vz2) - c1o1* oMdrho * vz2; - m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + c1o1* oMdrho) * (vz2 + vvz) * c1o2; - mfaaa = m0; - mfaab = m1; - mfaac = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfabc * c1o2 + mfabb * (vvz - c1o2) + mfaba * (vz2 - vvz) * c1o2; - m1 = -mfabc - c2o1* mfabb * vvz + mfaba * (c1o1 - vz2); - m2 = mfabc * c1o2 + mfabb * (vvz + c1o2) + mfaba * (vz2 + vvz) * c1o2; - mfaba = m0; - mfabb = m1; - mfabc = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2; - m1 = -mfacc - c2o1* mfacb * vvz + mfaca * (c1o1 - vz2) - c1o3 * oMdrho * vz2; - m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2; - mfaca = m0; - mfacb = m1; - mfacc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfbac * c1o2 + mfbab * (vvz - c1o2) + mfbaa * (vz2 - vvz) * c1o2; - m1 = -mfbac - c2o1* mfbab * vvz + mfbaa * (c1o1 - vz2); - m2 = mfbac * c1o2 + mfbab * (vvz + c1o2) + mfbaa * (vz2 + vvz) * c1o2; - mfbaa = m0; - mfbab = m1; - mfbac = m2; - /////////b////////////////////////////////////////////////////////////////////////// - m0 = mfbbc * c1o2 + mfbbb * (vvz - c1o2) + mfbba * (vz2 - vvz) * c1o2; - m1 = -mfbbc - c2o1* mfbbb * vvz + mfbba * (c1o1 - vz2); - m2 = mfbbc * c1o2 + mfbbb * (vvz + c1o2) + mfbba * (vz2 + vvz) * c1o2; - mfbba = m0; - mfbbb = m1; - mfbbc = m2; - /////////b////////////////////////////////////////////////////////////////////////// - m0 = mfbcc * c1o2 + mfbcb * (vvz - c1o2) + mfbca * (vz2 - vvz) * c1o2; - m1 = -mfbcc - c2o1* mfbcb * vvz + mfbca * (c1o1 - vz2); - m2 = mfbcc * c1o2 + mfbcb * (vvz + c1o2) + mfbca * (vz2 + vvz) * c1o2; - mfbca = m0; - mfbcb = m1; - mfbcc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2; - m1 = -mfcac - c2o1* mfcab * vvz + mfcaa * (c1o1 - vz2) - c1o3 * oMdrho * vz2; - m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2; - mfcaa = m0; - mfcab = m1; - mfcac = m2; - /////////c////////////////////////////////////////////////////////////////////////// - m0 = mfcbc * c1o2 + mfcbb * (vvz - c1o2) + mfcba * (vz2 - vvz) * c1o2; - m1 = -mfcbc - c2o1* mfcbb * vvz + mfcba * (c1o1 - vz2); - m2 = mfcbc * c1o2 + mfcbb * (vvz + c1o2) + mfcba * (vz2 + vvz) * c1o2; - mfcba = m0; - mfcbb = m1; - mfcbc = m2; - /////////c////////////////////////////////////////////////////////////////////////// - m0 = mfccc * c1o2 + mfccb * (vvz - c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 - vvz) * c1o2; - m1 = -mfccc - c2o1* mfccb * vvz + mfcca * (c1o1 - vz2) - c1o9 * oMdrho * vz2; - m2 = mfccc * c1o2 + mfccb * (vvz + c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 + vvz) * c1o2; - mfcca = m0; - mfccb = m1; - mfccc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - //mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - m0 = mfaca * c1o2 + mfaba * (vvy - c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfaca - c2o1* mfaba * vvy + mfaaa * (c1o1 - vy2) - c1o6 * oMdrho * vy2; - m2 = mfaca * c1o2 + mfaba * (vvy + c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 + vvy) * c1o2; - mfaaa = m0; - mfaba = m1; - mfaca = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfacb * c1o2 + mfabb * (vvy - c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfacb - c2o1* mfabb * vvy + mfaab * (c1o1 - vy2) - c2o3 * oMdrho * vy2; - m2 = mfacb * c1o2 + mfabb * (vvy + c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 + vvy) * c1o2; - mfaab = m0; - mfabb = m1; - mfacb = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfacc * c1o2 + mfabc * (vvy - c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfacc - c2o1* mfabc * vvy + mfaac * (c1o1 - vy2) - c1o6 * oMdrho * vy2; - m2 = mfacc * c1o2 + mfabc * (vvy + c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 + vvy) * c1o2; - mfaac = m0; - mfabc = m1; - mfacc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfbca * c1o2 + mfbba * (vvy - c1o2) + mfbaa * (vy2 - vvy) * c1o2; - m1 = -mfbca - c2o1* mfbba * vvy + mfbaa * (c1o1 - vy2); - m2 = mfbca * c1o2 + mfbba * (vvy + c1o2) + mfbaa * (vy2 + vvy) * c1o2; - mfbaa = m0; - mfbba = m1; - mfbca = m2; - /////////b////////////////////////////////////////////////////////////////////////// - m0 = mfbcb * c1o2 + mfbbb * (vvy - c1o2) + mfbab * (vy2 - vvy) * c1o2; - m1 = -mfbcb - c2o1* mfbbb * vvy + mfbab * (c1o1 - vy2); - m2 = mfbcb * c1o2 + mfbbb * (vvy + c1o2) + mfbab * (vy2 + vvy) * c1o2; - mfbab = m0; - mfbbb = m1; - mfbcb = m2; - /////////b////////////////////////////////////////////////////////////////////////// - m0 = mfbcc * c1o2 + mfbbc * (vvy - c1o2) + mfbac * (vy2 - vvy) * c1o2; - m1 = -mfbcc - c2o1* mfbbc * vvy + mfbac * (c1o1 - vy2); - m2 = mfbcc * c1o2 + mfbbc * (vvy + c1o2) + mfbac * (vy2 + vvy) * c1o2; - mfbac = m0; - mfbbc = m1; - mfbcc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcca * c1o2 + mfcba * (vvy - c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfcca - c2o1* mfcba * vvy + mfcaa * (c1o1 - vy2) - c1o18 * oMdrho * vy2; - m2 = mfcca * c1o2 + mfcba * (vvy + c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 + vvy) * c1o2; - mfcaa = m0; - mfcba = m1; - mfcca = m2; - /////////c////////////////////////////////////////////////////////////////////////// - m0 = mfccb * c1o2 + mfcbb * (vvy - c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfccb - c2o1* mfcbb * vvy + mfcab * (c1o1 - vy2) - c2o9 * oMdrho * vy2; - m2 = mfccb * c1o2 + mfcbb * (vvy + c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 + vvy) * c1o2; - mfcab = m0; - mfcbb = m1; - mfccb = m2; - /////////c////////////////////////////////////////////////////////////////////////// - m0 = mfccc * c1o2 + mfcbc * (vvy - c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 - vvy) * c1o2; - m1 = -mfccc - c2o1* mfcbc * vvy + mfcac * (c1o1 - vy2) - c1o18 * oMdrho * vy2; - m2 = mfccc * c1o2 + mfcbc * (vvy + c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 + vvy) * c1o2; - mfcac = m0; - mfcbc = m1; - mfccc = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - //mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - m0 = mfcaa * c1o2 + mfbaa * (vvx - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcaa - c2o1* mfbaa * vvx + mfaaa * (c1o1 - vx2) - c1o36 * oMdrho * vx2; - m2 = mfcaa * c1o2 + mfbaa * (vvx + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 + vvx) * c1o2; - mfaaa = m0; - mfbaa = m1; - mfcaa = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcba * c1o2 + mfbba * (vvx - c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcba - c2o1* mfbba * vvx + mfaba * (c1o1 - vx2) - c1o9 * oMdrho * vx2; - m2 = mfcba * c1o2 + mfbba * (vvx + c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 + vvx) * c1o2; - mfaba = m0; - mfbba = m1; - mfcba = m2; - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcca * c1o2 + mfbca * (vvx - c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcca - c2o1* mfbca * vvx + mfaca * (c1o1 - vx2) - c1o36 * oMdrho * vx2; - m2 = mfcca * c1o2 + mfbca * (vvx + c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 + vvx) * c1o2; - mfaca = m0; - mfbca = m1; - mfcca = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcab * c1o2 + mfbab * (vvx - c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcab - c2o1* mfbab * vvx + mfaab * (c1o1 - vx2) - c1o9 * oMdrho * vx2; - m2 = mfcab * c1o2 + mfbab * (vvx + c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 + vvx) * c1o2; - mfaab = m0; - mfbab = m1; - mfcab = m2; - ///////////b//////////////////////////////////////////////////////////////////////// - m0 = mfcbb * c1o2 + mfbbb * (vvx - c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcbb - c2o1* mfbbb * vvx + mfabb * (c1o1 - vx2) - c4o9 * oMdrho * vx2; - m2 = mfcbb * c1o2 + mfbbb * (vvx + c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 + vvx) * c1o2; - mfabb = m0; - mfbbb = m1; - mfcbb = m2; - ///////////b//////////////////////////////////////////////////////////////////////// - m0 = mfccb * c1o2 + mfbcb * (vvx - c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfccb - c2o1* mfbcb * vvx + mfacb * (c1o1 - vx2) - c1o9 * oMdrho * vx2; - m2 = mfccb * c1o2 + mfbcb * (vvx + c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 + vvx) * c1o2; - mfacb = m0; - mfbcb = m1; - mfccb = m2; - //////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////// - m0 = mfcac * c1o2 + mfbac * (vvx - c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcac - c2o1* mfbac * vvx + mfaac * (c1o1 - vx2) - c1o36 * oMdrho * vx2; - m2 = mfcac * c1o2 + mfbac * (vvx + c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 + vvx) * c1o2; - mfaac = m0; - mfbac = m1; - mfcac = m2; - ///////////c//////////////////////////////////////////////////////////////////////// - m0 = mfcbc * c1o2 + mfbbc * (vvx - c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfcbc - c2o1* mfbbc * vvx + mfabc * (c1o1 - vx2) - c1o9 * oMdrho * vx2; - m2 = mfcbc * c1o2 + mfbbc * (vvx + c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 + vvx) * c1o2; - mfabc = m0; - mfbbc = m1; - mfcbc = m2; - ///////////c//////////////////////////////////////////////////////////////////////// - m0 = mfccc * c1o2 + mfbcc * (vvx - c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 - vvx) * c1o2; - m1 = -mfccc - c2o1* mfbcc * vvx + mfacc * (c1o1 - vx2) - c1o36 * oMdrho * vx2; - m2 = mfccc * c1o2 + mfbcc * (vvx + c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 + vvx) * c1o2; - mfacc = m0; - mfbcc = m1; - mfccc = m2; - //////////////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////////////// - (D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00 ])[ke ] = mfabb;// - c2over27 ; (D.f[ DIR_P00 ])[k ] - (D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00 ])[kw ] = mfcbb;// - c2over27 ; (D.f[ DIR_M00 ])[kw ] - (D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0 ])[kn ] = mfbab;// - c2over27 ; (D.f[ DIR_0P0 ])[k ] - (D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0 ])[ks ] = mfbcb;// - c2over27 ; (D.f[ DIR_0M0 ])[ks ] - (D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P ])[kt ] = mfbba;// - c2over27 ; (D.f[ DIR_00P ])[k ] - (D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M ])[kb ] = mfbbc;// - c2over27 ; (D.f[ DIR_00M ])[kb ] - (D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0 ])[kne ] = mfaab;// - c1over54 ; (D.f[ DIR_PP0 ])[k ] - (D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0 ])[ksw ] = mfccb;// - c1over54 ; (D.f[ DIR_MM0 ])[ksw ] - (D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0 ])[kse ] = mfacb;// - c1over54 ; (D.f[ DIR_PM0 ])[ks ] - (D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0 ])[knw ] = mfcab;// - c1over54 ; (D.f[ DIR_MP0 ])[kw ] - (D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P ])[kte ] = mfaba;// - c1over54 ; (D.f[ DIR_P0P ])[k ] - (D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M ])[kbw ] = mfcbc;// - c1over54 ; (D.f[ DIR_M0M ])[kbw ] - (D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M ])[kbe ] = mfabc;// - c1over54 ; (D.f[ DIR_P0M ])[kb ] - (D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P ])[ktw ] = mfcba;// - c1over54 ; (D.f[ DIR_M0P ])[kw ] - (D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP ])[ktn ] = mfbaa;// - c1over54 ; (D.f[ DIR_0PP ])[k ] - (D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM ])[kbs ] = mfbcc;// - c1over54 ; (D.f[ DIR_0MM ])[kbs ] - (D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM ])[kbn ] = mfbac;// - c1over54 ; (D.f[ DIR_0PM ])[kb ] - (D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP ])[kts ] = mfbca;// - c1over54 ; (D.f[ DIR_0MP ])[ks ] - (D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// - c8over27 ; (D.f[ DIR_000])[k ] - (D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// - c1over216; (D.f[ DIR_PPP ])[k ] - (D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// - c1over216; (D.f[ DIR_PMP ])[ks ] - (D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// - c1over216; (D.f[ DIR_PPM ])[kb ] - (D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// - c1over216; (D.f[ DIR_PMM ])[kbs ] - (D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// - c1over216; (D.f[ DIR_MPP ])[kw ] - (D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// - c1over216; (D.f[ DIR_MMP ])[ksw ] - (D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// - c1over216; (D.f[ DIR_MPM ])[kbw ] - (D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// - c1over216; (D.f[ DIR_MMM ])[kbsw] - //////////////////////////////////////////////////////////////////////////////////// - } - } -} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh deleted file mode 100644 index f44842057d554498b0b5d4c733e2425e524a3b75..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef LB_Kernel_CUMULANT_K17_COMP_H -#define LB_Kernel_CUMULANT_K17_COMP_H - -#include <DataTypes.h> -#include <curand.h> - -__global__ void LB_Kernel_CumulantK17Comp( real omega, - unsigned int* bcMatD, - unsigned int* neighborX, - unsigned int* neighborY, - unsigned int* neighborZ, - real* DDStart, - int size_Mat, - int level, - real* forces, - real* quadricLimiters, - bool EvenOrOdd); -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu similarity index 63% rename from src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu rename to src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu index db8caf1b23c2087a4c5c76886fb4530bc6272a1d..1ffec96c255b7923f3ee39c01f756abd8cad8862 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu @@ -1,55 +1,78 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ + +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ // \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ // -// This file is part of VirtualFluids. VirtualFluids is free software: you can +// This file is part of VirtualFluids. VirtualFluids is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // -//! \file Cumulant27chimStream.cu -//! \ingroup GPU -//! \author Martin Schoenherr, Anna Wellmann +//! \file CumlantK17_Device.cu +//! \author Anna Wellmann, Martin Schönherr, Henry Korb, Henrik Asmuth +//! \date 05/12/2022 +//! \brief Kernel for CumulantK17 including different turbulence models and options for local body forces and writing macroscopic variables +//! +//! CumulantK17 kernel using chimera transformations and quartic limiters as present in Geier et al. (2017). Additional options are three different +//! eddy-viscosity turbulence models (Smagorinsky, AMD, QR) that can be set via the template parameter turbulenceModel (with default +//! TurbulenceModel::None). +//! The kernel is executed separately for each subset of fluid node indices with a different tag CollisionTemplate. For each subset, only the locally +//! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default +//! refers to the plain cumlant kernel (CollisionTemplate::Default). +//! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets +//! are provided by the utilized PostCollisionInteractiors depending on they specific requirements (e.g. writeMacroscopicVariables for probes). + //======================================================================================= -/* Device code */ -#include "LBM/LB.h" +#include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> -#include "Kernel/Utilities/DistributionHelper.cuh" +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" + +#include "GPU/TurbulentViscosityInlines.cuh" using namespace vf::lbm::constant; using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////// -__global__ void LB_Kernel_CumulantK17CompChimRedesigned( - real omega, +template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce> +__global__ void LB_Kernel_CumulantK17( + real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, - unsigned long numberOfLBnodes, + real* rho, + real* vx, + real* vy, + real* vz, + real* turbulentViscosity, + real SGSconstant, + unsigned long long numberOfLBnodes, int level, real* forces, + real* bodyForceX, + real* bodyForceY, + real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, @@ -64,19 +87,18 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( //! The cumulant kernel is executed in the following steps //! //////////////////////////////////////////////////////////////////////////////// - //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. //! - const unsigned kThread = vf::gpu::getNodeIndex(); + const unsigned nodeIndex = getNodeIndex(); ////////////////////////////////////////////////////////////////////////// - //! - Return for non-fluid nodes - if (kThread >= numberOfFluidNodes) + // run for all indices in size_Mat and fluid nodes + if (nodeIndex >= numberOfFluidNodes) return; - //////////////////////////////////////////////////////////////////////////////// //! - Get the node index from the array containing all indices of fluid nodes //! - const unsigned k_000 = fluidNodeIndices[kThread]; + const unsigned k_000 = fluidNodeIndices[nodeIndex]; ////////////////////////////////////////////////////////////////////////// //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on @@ -84,11 +106,11 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep); - + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// //! - Set neighbor indices (necessary for indirect addressing) - //! uint k_M00 = neighborX[k_000]; uint k_0M0 = neighborY[k_000]; uint k_00M = neighborZ[k_000]; @@ -96,9 +118,8 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( uint k_M0M = neighborZ[k_M00]; uint k_0MM = neighborZ[k_0M0]; uint k_MMM = neighborZ[k_MM0]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions (f's): + //! - Set local distributions //! real f_000 = (dist.f[DIR_000])[k_000]; real f_P00 = (dist.f[DIR_P00])[k_000]; @@ -159,28 +180,28 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( real& m_200 = f_PMM; real& m_000 = f_MMM; - //////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////(unsigned long)////////////////////////////// //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> //! real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + - (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + - ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + - ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + + (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + + ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + + ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + f_000; real oneOverRho = c1o1 / (c1o1 + drho); real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * - oneOverRho; + oneOverRho; real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * - oneOverRho; + oneOverRho; real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * - oneOverRho; + oneOverRho; //////////////////////////////////////////////////////////////////////////////////// //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref @@ -188,16 +209,55 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> //! real factor = c1o1; - // The factor has to be scaled for each level to get the correct acceleration. for (size_t i = 1; i <= level; i++) { factor *= c2o1; } - real fx = forces[0] / factor; - real fy = forces[1] / factor; - real fz = forces[2] / factor; - vvx += fx * c1o2; - vvy += fy * c1o2; - vvz += fz * c1o2; + + real fx = forces[0]; + real fy = forces[1]; + real fz = forces[2]; + + if( applyBodyForce ){ + fx += bodyForceX[k_000]; + fy += bodyForceY[k_000]; + fz += bodyForceZ[k_000]; + + // real vx = vvx; + // real vy = vvy; + // real vz = vvz; + real acc_x = fx * c1o2 / factor; + real acc_y = fy * c1o2 / factor; + real acc_z = fz * c1o2 / factor; + + vvx += acc_x; + vvy += acc_y; + vvz += acc_z; + + // Reset body force. To be used when not using round-off correction. + bodyForceX[k_000] = 0.0f; + bodyForceY[k_000] = 0.0f; + bodyForceZ[k_000] = 0.0f; + + //////////////////////////////////////////////////////////////////////////////////// + //!> Round-off correction + //! + //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) + //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation. + //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can + //!> differ by several orders of magnitude. + //!> \note 16/05/2022: Testing, still ongoing! + //! + // bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1; + // bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1; + // bodyForceZ[k_000] = (acc_z-(vvz-vz))*factor*c2o1; + } + else{ + vvx += fx * c1o2 / factor; + vvy += fy * c1o2 / factor; + vvz += fz * c1o2 / factor; + } + + //////////////////////////////////////////////////////////////////////////////////// // calculate the square of velocities for this lattice node real vx2 = vvx * vvx; @@ -272,15 +332,21 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. //! + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate modified omega with turbulent viscosity + //! + real omega = omega_in; + if(turbulenceModel != TurbulenceModel::None){ omega /= (c1o1 + c3o1*omega_in*turbulentViscosity[k_000]); } //////////////////////////////////////////////////////////// // 2. real OxxPyyPzz = c1o1; //////////////////////////////////////////////////////////// // 3. - real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega); - real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega); - real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) / - (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega); + real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega); + real OxyyMxzz = + c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega); + real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) / + (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega); //////////////////////////////////////////////////////////// // 4. real O4 = c1o1; @@ -292,16 +358,16 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( real O6 = c1o1; //////////////////////////////////////////////////////////////////////////////////// - //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116) + //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116) //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for //! different bulk viscosity). //! - real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega); + real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega); real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega); //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants (c's) from central moments according to Eq. (20)-(23) in + //! - Compute cumulants from central moments according to Eq. (20)-(23) in //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> //! @@ -318,27 +384,27 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( // 5. real c_122 = m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) + - c1o3 * (m_120 + m_102)) * - oneOverRho; + c1o3 * (m_120 + m_102)) * + oneOverRho; real c_212 = m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) + - c1o3 * (m_210 + m_012)) * - oneOverRho; + c1o3 * (m_210 + m_012)) * + oneOverRho; real c_221 = m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) + - c1o3 * (m_021 + m_201)) * - oneOverRho; + c1o3 * (m_021 + m_201)) * + oneOverRho; //////////////////////////////////////////////////////////// // 6. real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) - c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) - c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) * oneOverRho + - (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + + (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) * oneOverRho * oneOverRho - c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho + - (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + + (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) * oneOverRho * oneOverRho * c2o3 + c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho)); @@ -378,6 +444,22 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz); real dyuy = dxux + omega * c3o2 * mxxMyy; real dzuz = dxux + omega * c3o2 * mxxMzz; + + //////////////////////////////////////////////////////////////////////////////////// + switch (turbulenceModel) + { + case TurbulenceModel::None: + case TurbulenceModel::AMD: //AMD is computed in separate kernel + break; + case TurbulenceModel::Smagorinsky: + turbulentViscosity[k_000] = calcTurbulentViscositySmagorinsky(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz); + break; + case TurbulenceModel::QR: + turbulentViscosity[k_000] = calcTurbulentViscosityQR(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz); + break; + default: + break; + } //////////////////////////////////////////////////////////// //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), @@ -386,7 +468,6 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - ////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////// ////no correction @@ -394,18 +475,18 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( // mxxMyy += -(-omega) * (-mxxMyy); // mxxMzz += -(-omega) * (-mxxMzz); ////////////////////////////////////////////////////////////////////////// - m_011 += omega * (-m_011); m_101 += omega * (-m_101); m_110 += omega * (-m_110); + //////////////////////////////////////////////////////////////////////////////////// + // relax ////////////////////////////////////////////////////////////////////////// + // incl. limiter //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> //! - ////////////////////////////////////////////////////////////////////////// - // incl. limiter real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD); m_111 += wadjust * (-m_111); wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP); @@ -459,6 +540,7 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (c_121); c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (c_211); + ////////////////////////////////////////////////////////////////////////// // 5. c_122 += O5 * (-c_122); @@ -503,17 +585,17 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( ////////////////////////////////////////////////////////////////////////// // 6. m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) - - c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) - - c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) * - oneOverRho + - (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + - c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) * - oneOverRho * oneOverRho - - c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho + - (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + - (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) * - oneOverRho * oneOverRho * c2o3 + - c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho)); + c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) - + c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) * + oneOverRho + + (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + + c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) * + oneOverRho * oneOverRho - + c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho + + (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + + (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) * + oneOverRho * oneOverRho * c2o3 + + c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho)); //////////////////////////////////////////////////////////////////////////////////// //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in @@ -524,6 +606,15 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( m_010 = -m_010; m_001 = -m_001; + //Write to array here to distribute read/write + if(writeMacroscopicVariables) + { + rho[k_000] = drho; + vx[k_000] = vvx; + vy[k_000] = vvy; + vz[k_000] = vvz; + } + //////////////////////////////////////////////////////////////////////////////////// //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), @@ -573,31 +664,63 @@ __global__ void LB_Kernel_CumulantK17CompChimRedesigned( //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), //! DOI:10.3390/computation5020019 ]</b></a> //! - (dist.f[DIR_P00])[k_000] = f_M00; - (dist.f[DIR_M00])[k_M00] = f_P00; - (dist.f[DIR_0P0])[k_000] = f_0M0; - (dist.f[DIR_0M0])[k_0M0] = f_0P0; - (dist.f[DIR_00P])[k_000] = f_00M; - (dist.f[DIR_00M])[k_00M] = f_00P; - (dist.f[DIR_PP0])[k_000] = f_MM0; - (dist.f[DIR_MM0])[k_MM0] = f_PP0; - (dist.f[DIR_PM0])[k_0M0] = f_MP0; - (dist.f[DIR_MP0])[k_M00] = f_PM0; - (dist.f[DIR_P0P])[k_000] = f_M0M; - (dist.f[DIR_M0M])[k_M0M] = f_P0P; - (dist.f[DIR_P0M])[k_00M] = f_M0P; - (dist.f[DIR_M0P])[k_M00] = f_P0M; - (dist.f[DIR_0PP])[k_000] = f_0MM; - (dist.f[DIR_0MM])[k_0MM] = f_0PP; - (dist.f[DIR_0PM])[k_00M] = f_0MP; - (dist.f[DIR_0MP])[k_0M0] = f_0PM; + (dist.f[DIR_P00])[k_000] = f_M00; + (dist.f[DIR_M00])[k_M00] = f_P00; + (dist.f[DIR_0P0])[k_000] = f_0M0; + (dist.f[DIR_0M0])[k_0M0] = f_0P0; + (dist.f[DIR_00P])[k_000] = f_00M; + (dist.f[DIR_00M])[k_00M] = f_00P; + (dist.f[DIR_PP0])[k_000] = f_MM0; + (dist.f[DIR_MM0])[k_MM0] = f_PP0; + (dist.f[DIR_PM0])[k_0M0] = f_MP0; + (dist.f[DIR_MP0])[k_M00] = f_PM0; + (dist.f[DIR_P0P])[k_000] = f_M0M; + (dist.f[DIR_M0M])[k_M0M] = f_P0P; + (dist.f[DIR_P0M])[k_00M] = f_M0P; + (dist.f[DIR_M0P])[k_M00] = f_P0M; + (dist.f[DIR_0PP])[k_000] = f_0MM; + (dist.f[DIR_0MM])[k_0MM] = f_0PP; + (dist.f[DIR_0PM])[k_00M] = f_0MP; + (dist.f[DIR_0MP])[k_0M0] = f_0PM; (dist.f[DIR_000])[k_000] = f_000; - (dist.f[DIR_PPP])[k_000] = f_MMM; - (dist.f[DIR_PMP])[k_0M0] = f_MPM; - (dist.f[DIR_PPM])[k_00M] = f_MMP; - (dist.f[DIR_PMM])[k_0MM] = f_MPP; - (dist.f[DIR_MPP])[k_M00] = f_PMM; - (dist.f[DIR_MMP])[k_MM0] = f_PPM; - (dist.f[DIR_MPM])[k_M0M] = f_PMP; - (dist.f[DIR_MMM])[k_MMM] = f_PPP; -} \ No newline at end of file + (dist.f[DIR_PPP])[k_000] = f_MMM; + (dist.f[DIR_PMP])[k_0M0] = f_MPM; + (dist.f[DIR_PPM])[k_00M] = f_MMP; + (dist.f[DIR_PMM])[k_0MM] = f_MPP; + (dist.f[DIR_MPP])[k_M00] = f_PMM; + (dist.f[DIR_MMP])[k_MM0] = f_PPM; + (dist.f[DIR_MPM])[k_M0M] = f_PMP; + (dist.f[DIR_MMM])[k_MMM] = f_PPP; +} + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); + +template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh new file mode 100644 index 0000000000000000000000000000000000000000..da576618d1b08b55629c3c65fc115ceb822c8f7e --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh @@ -0,0 +1,29 @@ +#ifndef LB_Kernel_CUMULANT_K17_H +#define LB_Kernel_CUMULANT_K17_H + +#include <DataTypes.h> +#include <curand.h> + +template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce > __global__ void LB_Kernel_CumulantK17( + real omega_in, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + real* distributions, + real* rho, + real* vx, + real* vy, + real* vz, + real* turbulentViscosity, + real SGSconstant, + unsigned long long numberOfLBnodes, + int level, + real* forces, + real* bodyForceX, + real* bodyForceY, + real* bodyForceZ, + real* quadricLimiters, + bool isEvenTimestep, + const uint *fluidNodeIndices, + uint numberOfFluidNodes); +#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu index 72d13282fc604dddcfa84682425a7a1829855ea0..b9e25494490507bde5a6aa7d6dd588ac1a1f6c87 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu @@ -17,17 +17,18 @@ void CumulantK17BulkComp::run() dim3 grid(Grid, 1, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_CumulantK17BulkComp << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_CumulantK17BulkComp << < grid, threads >> >( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_CumulantK17BulkComp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu index cec04116ae4b411b1b3816ff4a8cab606c92491e..b33a3c251b5fb0cde8b1da0fcce097f955353d69 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu @@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -129,33 +129,33 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu index 6ef6b40d3b7079579f54ca68734deb274d0c1c3a..295804887f9c451120d463c7fcdd968bd2f24d12 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu @@ -31,15 +31,16 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve void CumulantK17Unified::run() { - GPUKernelParameter kernelParameter{ para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - (int)para->getParD(level)->numberOfNodes, - para->getParD(level)->forcing, - para->getParD(level)->isEvenTimestep }; + GPUKernelParameter kernelParameter{ + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + (int)para->getParD(level)->numberOfNodes, + para->getParD(level)->forcing, + para->getParD(level)->isEvenTimestep }; auto lambda = [] __device__(lbm::KernelParameter parameter) { return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu index 3eea267e55fee45111fb11cf1258559e2c3c63f2..a0db78d27b00372feab8490111183481abbec8b9 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu @@ -33,11 +33,12 @@ /* Device code */ #include "LBM/LB.h" #include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/ChimeraTransformation.h" using namespace vf::lbm::constant; using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" +using namespace vf::gpu; //////////////////////////////////////////////////////////////////////////////// __global__ void LB_Kernel_CumulantK17CompChim( diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu deleted file mode 100644 index 8c06b7117c8b1ef62b932a76bf5de0be2ae99b1c..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu +++ /dev/null @@ -1,61 +0,0 @@ -#include "CumulantK17CompChimRedesigned.h" - -#include "Parameter/Parameter.h" -#include "Parameter/CudaStreamManager.h" -#include "CumulantK17CompChimRedesigned_Device.cuh" - -#include <cuda.h> - -std::shared_ptr<CumulantK17CompChimRedesigned> CumulantK17CompChimRedesigned::getNewInstance(std::shared_ptr<Parameter> para, - int level) -{ - return std::shared_ptr<CumulantK17CompChimRedesigned>(new CumulantK17CompChimRedesigned(para, level)); -} - -void CumulantK17CompChimRedesigned::run() -{ - LB_Kernel_CumulantK17CompChimRedesigned <<< cudaGrid.grid, cudaGrid.threads >>>( - para->getParD(level)->omega, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes); - getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); -} - -void CumulantK17CompChimRedesigned::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex) -{ - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - - LB_Kernel_CumulantK17CompChimRedesigned<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>( - para->getParD(level)->omega, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); - getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); - -} - -CumulantK17CompChimRedesigned::CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) -{ - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; - this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); - this->kernelUsesFluidNodeIndices = true; -} - diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h deleted file mode 100644 index 4658075de330665fdba88a5ec8149a9b476d5ac7..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef CUMULANT_K17_COMP_CHIM_REDESIGN_H -#define CUMULANT_K17_COMP_CHIM_REDESIGN_H - -#include "Kernel/KernelImp.h" - -class CumulantK17CompChimRedesigned : public KernelImp -{ -public: - static std::shared_ptr<CumulantK17CompChimRedesigned> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run() override; - void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override; - -private: - CumulantK17CompChimRedesigned(); - CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level); -}; - -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh deleted file mode 100644 index 00628efc76447a09504d2fd32a26a63a4d611c66..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H -#define LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H - -#include <DataTypes.h> -#include <curand.h> - -__global__ void LB_Kernel_CumulantK17CompChimRedesigned( - real omega, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - unsigned long numberOfLBnodes, - int level, - real* forces, - real* quadricLimiters, - bool isEvenTimestep, - const uint* fluidNodeIndices, - uint numberOfFluidNodes); -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu deleted file mode 100644 index 6fae9f6d4845019afd363790eea0ee17c69a060f..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu +++ /dev/null @@ -1,61 +0,0 @@ -#include "CumulantK17CompChimStream.h" - -#include "Parameter/Parameter.h" -#include "Parameter/CudaStreamManager.h" -#include "CumulantK17CompChimStream_Device.cuh" - -#include <cuda.h> - -std::shared_ptr<CumulantK17CompChimStream> CumulantK17CompChimStream::getNewInstance(std::shared_ptr<Parameter> para, - int level) -{ - return std::shared_ptr<CumulantK17CompChimStream>(new CumulantK17CompChimStream(para, level)); -} - -void CumulantK17CompChimStream::run() -{ - LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>( - para->getParD(level)->omega, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes); - getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); -} - -void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex) -{ - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); - - LB_Kernel_CumulantK17CompChimStream<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>( - para->getParD(level)->omega, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep, - indices, - size_indices); - getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed"); - -} - -CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level): KernelImp(para, level) -{ - myPreProcessorTypes.push_back(InitCompSP27); - myKernelGroup = BasicKernel; - this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); - this->kernelUsesFluidNodeIndices = true; -} - diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h deleted file mode 100644 index 325826e04c893b7c56b7f00bb2503a4eb1fda441..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef CUMULANT_K17_COMP_CHIM_SPARSE_H -#define CUMULANT_K17_COMP_CHIM_SPARSE_H - -#include "Kernel/KernelImp.h" - -class CumulantK17CompChimStream : public KernelImp -{ -public: - static std::shared_ptr<CumulantK17CompChimStream> getNewInstance(std::shared_ptr<Parameter> para, int level); - void run() override; - void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override; - -private: - CumulantK17CompChimStream(); - CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level); -}; - -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu deleted file mode 100644 index 830fcc6c328f2ecd0f626539040868696065065f..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu +++ /dev/null @@ -1,640 +0,0 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ -// \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ -// -// This file is part of VirtualFluids. VirtualFluids is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \file Cumulant27chimStream.cu -//! \ingroup GPU -//! \author Martin Schoenherr, Anna Wellmann -//======================================================================================= -/* Device code */ -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" - -//////////////////////////////////////////////////////////////////////////////// -__global__ void LB_Kernel_CumulantK17CompChimStream( - real omega, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - unsigned long size_Mat, - int level, - real* forces, - real* quadricLimiters, - bool isEvenTimestep, - const uint *fluidNodeIndices, - uint numberOfFluidNodes) -{ - ////////////////////////////////////////////////////////////////////////// - //! Cumulant K17 Kernel is based on \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a> - //! - //! The cumulant kernel is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - const unsigned k_thread = nx * (ny * z + y) + x; - - ////////////////////////////////////////////////////////////////////////// - // run for all indices in fluidNodeIndices - if (k_thread < numberOfFluidNodes) { - ////////////////////////////////////////////////////////////////////////// - //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on - //! timestep is based on the esoteric twist algorithm \ref <a - //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), - //! DOI:10.3390/computation5020019 ]</b></a> - - const unsigned k = fluidNodeIndices[k_thread]; - - Distributions27 dist; - if (isEvenTimestep) { - dist.f[DIR_P00] = &distributions[DIR_P00 * size_Mat]; - dist.f[DIR_M00] = &distributions[DIR_M00 * size_Mat]; - dist.f[DIR_0P0] = &distributions[DIR_0P0 * size_Mat]; - dist.f[DIR_0M0] = &distributions[DIR_0M0 * size_Mat]; - dist.f[DIR_00P] = &distributions[DIR_00P * size_Mat]; - dist.f[DIR_00M] = &distributions[DIR_00M * size_Mat]; - dist.f[DIR_PP0] = &distributions[DIR_PP0 * size_Mat]; - dist.f[DIR_MM0] = &distributions[DIR_MM0 * size_Mat]; - dist.f[DIR_PM0] = &distributions[DIR_PM0 * size_Mat]; - dist.f[DIR_MP0] = &distributions[DIR_MP0 * size_Mat]; - dist.f[DIR_P0P] = &distributions[DIR_P0P * size_Mat]; - dist.f[DIR_M0M] = &distributions[DIR_M0M * size_Mat]; - dist.f[DIR_P0M] = &distributions[DIR_P0M * size_Mat]; - dist.f[DIR_M0P] = &distributions[DIR_M0P * size_Mat]; - dist.f[DIR_0PP] = &distributions[DIR_0PP * size_Mat]; - dist.f[DIR_0MM] = &distributions[DIR_0MM * size_Mat]; - dist.f[DIR_0PM] = &distributions[DIR_0PM * size_Mat]; - dist.f[DIR_0MP] = &distributions[DIR_0MP * size_Mat]; - dist.f[DIR_000] = &distributions[DIR_000 * size_Mat]; - dist.f[DIR_PPP] = &distributions[DIR_PPP * size_Mat]; - dist.f[DIR_MMP] = &distributions[DIR_MMP * size_Mat]; - dist.f[DIR_PMP] = &distributions[DIR_PMP * size_Mat]; - dist.f[DIR_MPP] = &distributions[DIR_MPP * size_Mat]; - dist.f[DIR_PPM] = &distributions[DIR_PPM * size_Mat]; - dist.f[DIR_MMM] = &distributions[DIR_MMM * size_Mat]; - dist.f[DIR_PMM] = &distributions[DIR_PMM * size_Mat]; - dist.f[DIR_MPM] = &distributions[DIR_MPM * size_Mat]; - } else { - dist.f[DIR_M00] = &distributions[DIR_P00 * size_Mat]; - dist.f[DIR_P00] = &distributions[DIR_M00 * size_Mat]; - dist.f[DIR_0M0] = &distributions[DIR_0P0 * size_Mat]; - dist.f[DIR_0P0] = &distributions[DIR_0M0 * size_Mat]; - dist.f[DIR_00M] = &distributions[DIR_00P * size_Mat]; - dist.f[DIR_00P] = &distributions[DIR_00M * size_Mat]; - dist.f[DIR_MM0] = &distributions[DIR_PP0 * size_Mat]; - dist.f[DIR_PP0] = &distributions[DIR_MM0 * size_Mat]; - dist.f[DIR_MP0] = &distributions[DIR_PM0 * size_Mat]; - dist.f[DIR_PM0] = &distributions[DIR_MP0 * size_Mat]; - dist.f[DIR_M0M] = &distributions[DIR_P0P * size_Mat]; - dist.f[DIR_P0P] = &distributions[DIR_M0M * size_Mat]; - dist.f[DIR_M0P] = &distributions[DIR_P0M * size_Mat]; - dist.f[DIR_P0M] = &distributions[DIR_M0P * size_Mat]; - dist.f[DIR_0MM] = &distributions[DIR_0PP * size_Mat]; - dist.f[DIR_0PP] = &distributions[DIR_0MM * size_Mat]; - dist.f[DIR_0MP] = &distributions[DIR_0PM * size_Mat]; - dist.f[DIR_0PM] = &distributions[DIR_0MP * size_Mat]; - dist.f[DIR_000] = &distributions[DIR_000 * size_Mat]; - dist.f[DIR_MMM] = &distributions[DIR_PPP * size_Mat]; - dist.f[DIR_PPM] = &distributions[DIR_MMP * size_Mat]; - dist.f[DIR_MPM] = &distributions[DIR_PMP * size_Mat]; - dist.f[DIR_PMM] = &distributions[DIR_MPP * size_Mat]; - dist.f[DIR_MMP] = &distributions[DIR_PPM * size_Mat]; - dist.f[DIR_PPP] = &distributions[DIR_MMM * size_Mat]; - dist.f[DIR_MPP] = &distributions[DIR_PMM * size_Mat]; - dist.f[DIR_PMP] = &distributions[DIR_MPM * size_Mat]; - } - //////////////////////////////////////////////////////////////////////////////// - //! - Set neighbor indices (necessary for indirect addressing) - uint kw = neighborX[k]; - uint ks = neighborY[k]; - uint kb = neighborZ[k]; - uint ksw = neighborY[kw]; - uint kbw = neighborZ[kw]; - uint kbs = neighborZ[ks]; - uint kbsw = neighborZ[ksw]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions - //! - real mfcbb = (dist.f[DIR_P00])[k]; - real mfabb = (dist.f[DIR_M00])[kw]; - real mfbcb = (dist.f[DIR_0P0])[k]; - real mfbab = (dist.f[DIR_0M0])[ks]; - real mfbbc = (dist.f[DIR_00P])[k]; - real mfbba = (dist.f[DIR_00M])[kb]; - real mfccb = (dist.f[DIR_PP0])[k]; - real mfaab = (dist.f[DIR_MM0])[ksw]; - real mfcab = (dist.f[DIR_PM0])[ks]; - real mfacb = (dist.f[DIR_MP0])[kw]; - real mfcbc = (dist.f[DIR_P0P])[k]; - real mfaba = (dist.f[DIR_M0M])[kbw]; - real mfcba = (dist.f[DIR_P0M])[kb]; - real mfabc = (dist.f[DIR_M0P])[kw]; - real mfbcc = (dist.f[DIR_0PP])[k]; - real mfbaa = (dist.f[DIR_0MM])[kbs]; - real mfbca = (dist.f[DIR_0PM])[kb]; - real mfbac = (dist.f[DIR_0MP])[ks]; - real mfbbb = (dist.f[DIR_000])[k]; - real mfccc = (dist.f[DIR_PPP])[k]; - real mfaac = (dist.f[DIR_MMP])[ksw]; - real mfcac = (dist.f[DIR_PMP])[ks]; - real mfacc = (dist.f[DIR_MPP])[kw]; - real mfcca = (dist.f[DIR_PPM])[kb]; - real mfaaa = (dist.f[DIR_MMM])[kbsw]; - real mfcaa = (dist.f[DIR_PMM])[kbs]; - real mfaca = (dist.f[DIR_MPM])[kbw]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + - ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + - mfbbb; - - real rho = c1o1 + drho; - real OOrho = c1o1 / rho; - - real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb)) * - OOrho; - real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab)) * - OOrho; - real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba)) * - OOrho; - //////////////////////////////////////////////////////////////////////////////////// - //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - real factor = c1o1; - for (size_t i = 1; i <= level; i++) { - factor *= c2o1; - } - real fx = forces[0] / factor; - real fy = forces[1] / factor; - real fz = forces[2] / factor; - vvx += fx * c1o2; - vvy += fy * c1o2; - vvz += fz * c1o2; - //////////////////////////////////////////////////////////////////////////////////// - // calculate the square of velocities for this lattice node - real vx2 = vvx * vvx; - real vy2 = vvy * vvy; - real vz2 = vvz * vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to - //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - real wadjust; - real qudricLimitP = quadricLimiters[0]; - real qudricLimitM = quadricLimiters[1]; - real qudricLimitD = quadricLimiters[2]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a - //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - forwardChimera(mfaab, mfabb, mfacb, vvy, vy2); - forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); - forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); - forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - forwardChimera(mfaba, mfbba, mfcba, vvx, vx2); - forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - forwardChimera(mfaab, mfbab, mfcab, vvx, vx2); - forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); - forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); - forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); - forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations - //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. - //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk - //! viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. - //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz - //! \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. - //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz - //! \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. - //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with - //! simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification - //! all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. - //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. - //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. - //! - //////////////////////////////////////////////////////////// - // 2. - real OxxPyyPzz = c1o1; - //////////////////////////////////////////////////////////// - // 3. - real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega); - real OxyyMxzz = - c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega); - real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) / - (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega); - //////////////////////////////////////////////////////////// - // 4. - real O4 = c1o1; - //////////////////////////////////////////////////////////// - // 5. - real O5 = c1o1; - //////////////////////////////////////////////////////////// - // 6. - real O6 = c1o1; - - //////////////////////////////////////////////////////////////////////////////////// - //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for - //! different bulk viscosity). - //! - real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega); - real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants from central moments according to Eq. (20)-(23) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - //////////////////////////////////////////////////////////// - // 4. - real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho; - real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho; - real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho; - - real CUMcca = - mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho)); - real CUMcac = - mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho)); - real CUMacc = - mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho)); - //////////////////////////////////////////////////////////// - // 5. - real CUMbcc = - mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + - c1o3 * (mfbca + mfbac)) * - OOrho; - real CUMcbc = - mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + - c1o3 * (mfcba + mfabc)) * - OOrho; - real CUMccb = - mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + - c1o3 * (mfacb + mfcab)) * - OOrho; - //////////////////////////////////////////////////////////// - // 6. - real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * - OOrho + - (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + - c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) * - OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + - (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + - (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * - OOrho * OOrho * c2o3 + - c1o27 * ((drho * drho - drho) * OOrho * OOrho)); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute linear combinations of second and third order cumulants - //! - //////////////////////////////////////////////////////////// - // 2. - real mxxPyyPzz = mfcaa + mfaca + mfaac; - real mxxMyy = mfcaa - mfaca; - real mxxMzz = mfcaa - mfaac; - //////////////////////////////////////////////////////////// - // 3. - real mxxyPyzz = mfcba + mfabc; - real mxxyMyzz = mfcba - mfabc; - - real mxxzPyyz = mfcab + mfacb; - real mxxzMyyz = mfcab - mfacb; - - real mxyyPxzz = mfbca + mfbac; - real mxyyMxzz = mfbca - mfbac; - - //////////////////////////////////////////////////////////////////////////////////// - // incl. correction - //////////////////////////////////////////////////////////// - //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times - //! the gradients later. - //! - real Dxy = -c3o1 * omega * mfbba; - real Dxz = -c3o1 * omega * mfbab; - real Dyz = -c3o1 * omega * mfabb; - real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; - //////////////////////////////////////////////////////////// - //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - mxxPyyPzz += - OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); - mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - - //////////////////////////////////////////////////////////////////////////////////// - ////no correction - // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); - // mxxMyy += -(-omega) * (-mxxMyy); - // mxxMzz += -(-omega) * (-mxxMzz); - ////////////////////////////////////////////////////////////////////////// - mfabb += omega * (-mfabb); - mfbab += omega * (-mfbab); - mfbba += omega * (-mfbba); - - //////////////////////////////////////////////////////////////////////////////////// - // relax - ////////////////////////////////////////////////////////////////////////// - // incl. limiter - //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD); - mfbbb += wadjust * (-mfbbb); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - // no limiter - // mfbbb += OxyyMxzz * (-mfbbb); - // mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - // mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - // mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - // mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - // mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - // mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute inverse linear combinations of second and third order cumulants - //! - mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz); - mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz); - - mfcba = (mxxyMyzz + mxxyPyzz) * c1o2; - mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; - mfcab = (mxxzMyyz + mxxzPyyz) * c1o2; - mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; - mfbca = (mxyyMxzz + mxyyPxzz) * c1o2; - mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; - ////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - // 4. - // no limiter - //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according - //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMacc); - CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMcac); - CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (CUMcca); - CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * factorB + (c1o1 - O4) * (CUMbbc); - CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (CUMbcb); - CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (CUMcbb); - - ////////////////////////////////////////////////////////////////////////// - // 5. - CUMbcc += O5 * (-CUMbcc); - CUMcbc += O5 * (-CUMcbc); - CUMccb += O5 * (-CUMccb); - - ////////////////////////////////////////////////////////////////////////// - // 6. - CUMccc += O6 * (-CUMccc); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - - ////////////////////////////////////////////////////////////////////////// - // 4. - mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; - mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho; - mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; - - mfcca = - CUMcca + - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9; - mfcac = - CUMcac + - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9; - mfacc = - CUMacc + - (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9; - - ////////////////////////////////////////////////////////////////////////// - // 5. - mfbcc = CUMbcc + c1o3 * - (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + - c2o1 * (mfbab * mfacb + mfbba * mfabc)) + - (mfbca + mfbac)) * - OOrho; - mfcbc = CUMcbc + c1o3 * - (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + - c2o1 * (mfabb * mfcab + mfbba * mfbac)) + - (mfcba + mfabc)) * - OOrho; - mfccb = CUMccb + c1o3 * - (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + - c2o1 * (mfbab * mfbca + mfabb * mfcba)) + - (mfacb + mfcab)) * - OOrho; - - ////////////////////////////////////////////////////////////////////////// - // 6. - mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * - OOrho + - (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + - c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) * - OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + - (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + - (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * - OOrho * OOrho * c2o3 + - c1o27 * ((drho * drho - drho) * OOrho * OOrho)); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - mfbaa = -mfbaa; - mfaba = -mfaba; - mfaab = -mfaab; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a - //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1); - backwardChimera(mfaba, mfbba, mfcba, vvx, vx2); - backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3); - backwardChimera(mfaab, mfbab, mfcab, vvx, vx2); - backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); - backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); - backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3); - backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); - backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6); - backwardChimera(mfaab, mfabb, mfacb, vvy, vy2); - backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18); - backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); - backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6); - backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); - backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Write distributions: style of reading and writing the distributions from/to - //! stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), - //! DOI:10.3390/computation5020019 ]</b></a> - //! - (dist.f[DIR_P00])[k] = mfabb; - (dist.f[DIR_M00])[kw] = mfcbb; - (dist.f[DIR_0P0])[k] = mfbab; - (dist.f[DIR_0M0])[ks] = mfbcb; - (dist.f[DIR_00P])[k] = mfbba; - (dist.f[DIR_00M])[kb] = mfbbc; - (dist.f[DIR_PP0])[k] = mfaab; - (dist.f[DIR_MM0])[ksw] = mfccb; - (dist.f[DIR_PM0])[ks] = mfacb; - (dist.f[DIR_MP0])[kw] = mfcab; - (dist.f[DIR_P0P])[k] = mfaba; - (dist.f[DIR_M0M])[kbw] = mfcbc; - (dist.f[DIR_P0M])[kb] = mfabc; - (dist.f[DIR_M0P])[kw] = mfcba; - (dist.f[DIR_0PP])[k] = mfbaa; - (dist.f[DIR_0MM])[kbs] = mfbcc; - (dist.f[DIR_0PM])[kb] = mfbac; - (dist.f[DIR_0MP])[ks] = mfbca; - (dist.f[DIR_000])[k] = mfbbb; - (dist.f[DIR_PPP])[k] = mfaaa; - (dist.f[DIR_PMP])[ks] = mfaca; - (dist.f[DIR_PPM])[kb] = mfaac; - (dist.f[DIR_PMM])[kbs] = mfacc; - (dist.f[DIR_MPP])[kw] = mfcaa; - (dist.f[DIR_MMP])[ksw] = mfcca; - (dist.f[DIR_MPM])[kbw] = mfcac; - (dist.f[DIR_MMM])[kbsw] = mfccc; - } -} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh deleted file mode 100644 index f74192c0423ba9dc96820d7f46eecb9d49a39ed4..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef LB_Kernel_CUMULANT_K17_COMP_CHIM_SPARSE_H -#define LB_Kernel_CUMULANT_K17_COMP_CHIM_SPARSE_H - -#include <DataTypes.h> -#include <curand.h> - -__global__ void LB_Kernel_CumulantK17CompChimStream( - real omega, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - unsigned long size_Mat, - int level, - real* forces, - real* quadricLimiters, - bool isEvenTimestep, - const uint* fluidNodeIndices, - uint numberOfFluidNodes); -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu index 54af306039585f3beb39b05f2f2e0a96ae784e12..2e0af0bdb85d3f008768f9f430e8b4e5d9719b0f 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu @@ -1,8 +1,8 @@ #include "CumulantK18Comp.h" #include "CumulantK18Comp_Device.cuh" - #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -11,37 +11,22 @@ std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr void CumulantK18Comp::run() { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_CumulantK18Comp << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->g6.g[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK18Comp execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_CumulantK18Comp <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->g6.g[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_CumulantK18Comp execution failed"); } CumulantK18Comp::CumulantK18Comp(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu index bb42d113e47ce28f153ac295f2d9a934dd1b213a..0e4ae5caebb9bd4b1c889a78bfadb62487742c98 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu @@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK18Comp( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } Distributions6 G; if (EvenOrOdd == true) { - G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; + G.g[DIR_P00] = &G6[DIR_P00 * size_Mat]; + G.g[DIR_M00] = &G6[DIR_M00 * size_Mat]; + G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat]; + G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat]; + G.g[DIR_00P] = &G6[DIR_00P * size_Mat]; + G.g[DIR_00M] = &G6[DIR_00M * size_Mat]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; + G.g[DIR_M00] = &G6[DIR_P00 * size_Mat]; + G.g[DIR_P00] = &G6[DIR_M00 * size_Mat]; + G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat]; + G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat]; + G.g[DIR_00M] = &G6[DIR_00P * size_Mat]; + G.g[DIR_00P] = &G6[DIR_00M * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu index 0c1778dc39496c6564dedcbe1f6e818bee147191..d0d81eaac711d4d80284b66a1040e0e8404f5d4d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu @@ -1,8 +1,8 @@ #include "CumulantK20Comp.h" #include "CumulantK20Comp_Device.cuh" - #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -11,37 +11,22 @@ std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr void CumulantK20Comp::run() { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_CumulantK20Comp << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->g6.g[0], - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK20Comp execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_CumulantK20Comp <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->g6.g[0], + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_CumulantK20Comp execution failed"); } CumulantK20Comp::CumulantK20Comp(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu index c805fc293aeb8b182bb0e01df82b584da69d0175..2dbe0bb62412f9363fdd0e714f5da296f81ae5b3 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu @@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK20Comp( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } Distributions6 G; if (EvenOrOdd == true) { - G.g[DIR_P00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_M00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00M *size_Mat]; + G.g[DIR_P00] = &G6[DIR_P00 * size_Mat]; + G.g[DIR_M00] = &G6[DIR_M00 * size_Mat]; + G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat]; + G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat]; + G.g[DIR_00P] = &G6[DIR_00P * size_Mat]; + G.g[DIR_00M] = &G6[DIR_00M * size_Mat]; } else { - G.g[DIR_M00] = &G6[DIR_P00 *size_Mat]; - G.g[DIR_P00] = &G6[DIR_M00 *size_Mat]; - G.g[DIR_0M0] = &G6[DIR_0P0 *size_Mat]; - G.g[DIR_0P0] = &G6[DIR_0M0 *size_Mat]; - G.g[DIR_00M] = &G6[DIR_00P *size_Mat]; - G.g[DIR_00P] = &G6[DIR_00M *size_Mat]; + G.g[DIR_M00] = &G6[DIR_P00 * size_Mat]; + G.g[DIR_P00] = &G6[DIR_M00 * size_Mat]; + G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat]; + G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat]; + G.g[DIR_00M] = &G6[DIR_00P * size_Mat]; + G.g[DIR_00P] = &G6[DIR_00M * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu index be94791572f739fb2eef7c049702caeedb6641fc..b576333f50304f5628e073d2eee16cf5b82c9d34 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu @@ -2,6 +2,7 @@ #include "MRTCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Paramet void MRTCompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_MRT_Comp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_MRT_Comp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed"); } MRTCompSP27::MRTCompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu index a9aefa2d62a962766470c93a62adeefa4f19570e..c3eb51a114e5c4a3be7605765d0889a7bae25cf0 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca + mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh index 558b4f333e7c92b372a5097aa4917dd6d1230a34..3be594e3e39a57cd71741cd060e9dddda15d6035 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh @@ -5,7 +5,7 @@ #include <DataTypes.h> #include <cuda_runtime.h> -#include <lbm/KernelParameter.h> +#include "lbm/KernelParameter.h" #include "Kernel/Utilities/DistributionHelper.cuh" @@ -23,7 +23,7 @@ struct GPUKernelParameter unsigned int* neighborY; unsigned int* neighborZ; real* distributions; - int size_Mat; + int numberOfLBnodes; real* forces; bool isEvenTimestep; }; @@ -31,19 +31,22 @@ struct GPUKernelParameter template<typename KernelFunctor> __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter) { - const uint k = getNodeIndex(); + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = getNodeIndex(); - if(k >= kernelParameter.size_Mat) + if(nodeIndex >= kernelParameter.numberOfLBnodes) return; - if (!isValidFluidNode(kernelParameter.typeOfGridNode[k])) + if (!isValidFluidNode(kernelParameter.typeOfGridNode[nodeIndex])) return; DistributionWrapper distributionWrapper { kernelParameter.distributions, - (unsigned int)kernelParameter.size_Mat, + (unsigned int)kernelParameter.numberOfLBnodes, kernelParameter.isEvenTimestep, - k, + nodeIndex, kernelParameter.neighborX, kernelParameter.neighborY, kernelParameter.neighborZ diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu index 81655fac9cfd0b562ba60a5ee289fb64da5c1fba..3fb9be28654f83a7a98bb7d6b3a8a46e9170e7a8 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu @@ -2,6 +2,7 @@ #include "BGKIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Par void BGKIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_BGK_Incomp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_BGK_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_BGK_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_BGK_Incomp_SP_27 execution failed"); } BGKIncompSP27::BGKIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu index 9a94006b8a1be745fc2bcfdd80e454152347139d..233595656720f5c84cf5be9e555565af0e9c95d0 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu index 86b513f1252f2787abee637819e64606d111c4fa..f274f576a14fc193bcabd44d2c9078a2c98055bc 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu @@ -2,6 +2,7 @@ #include "BGKPlusIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared void BGKPlusIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_BGK_Plus_Incomp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_BGK_Plus_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_BGK_Plus_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_BGK_Plus_Incomp_SP_27 execution failed"); } BGKPlusIncompSP27::BGKPlusIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu index 9355e42aa5b05190f063f5247d8d6c0dea787a02..b49b76c6224be4b3543c01647a6553e6fc64b74e 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -126,33 +126,33 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu index 05f374096c9c5da2460b32cf5ae8cb59cfa78382..3a6760b619d2ca1a7eb19771478eb9e5989ead0c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu @@ -2,6 +2,7 @@ #include "CascadeIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared void CascadeIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cascade_Incomp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cascade_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cascade_Incomp_SP_27 execution failed"); } CascadeIncompSP27::CascadeIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu index 92cc749b135739d5f38c9916c4ee0da7497e5f2d..8e607cabb4cc40bbb22c5ad3ec6db2c63154add6 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu index 62768ef9948b6c259c5ad4005237081f4d255e73..44beb8507d5664f01283130dd3087a788e4491ed 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu @@ -2,6 +2,7 @@ #include "Cumulant1hIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,38 +11,23 @@ std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std:: void Cumulant1hIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cum_1h_Incomp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->deltaPhi, - para->getAngularVelocity(), - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->coordinateX, - para->getParD(level)->coordinateY, - para->getParD(level)->coordinateZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Cum_1h_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cum_1h_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->deltaPhi, + para->getAngularVelocity(), + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->coordinateX, + para->getParD(level)->coordinateY, + para->getParD(level)->coordinateZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cum_1h_Incomp_SP_27 execution failed"); } Cumulant1hIncompSP27::Cumulant1hIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu index 0243046082ce1853011c6632d5a2f80364ebe0db..5130017acc642c92b064a500e79ff685ec2f6d97 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu @@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -159,33 +159,33 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //Ship real coord0X = 281.125f;//7.5f; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu index 6551e1bde300e3a4d2a4f50cefdfff258edfacee..3a740bef6d7fbaa2883b3d36930d49bf9bf0bb3e 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu @@ -2,6 +2,7 @@ #include "CumulantIsoIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,36 +11,21 @@ std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std void CumulantIsoIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_Cum_IsoTest_Incomp_SP_27 << < grid, threads >> >(para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->dxxUx, - para->getParD(level)->dyyUy, - para->getParD(level)->dzzUz, - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_Cum_IsoTest_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->dxxUx, + para->getParD(level)->dyyUy, + para->getParD(level)->dzzUz, + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_Cum_IsoTest_Incomp_SP_27 execution failed"); } CumulantIsoIncompSP27::CumulantIsoIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu index 64d697f2b0953cee75f4397e399a0e6128e486a2..1f0ef2ec84c8d4b9b4be57548bde396c3316a80d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu @@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu index 40cde56b007f70f98db13d5962f3e746b97637ef..7ae17b97170b4d8474acd6777f7c27411a962681 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu @@ -2,6 +2,7 @@ #include "CumulantK15Incomp_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared void CumulantK15Incomp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_CumulantK15Incomp <<< grid, threads >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_CumulantK15Incomp <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed"); } CumulantK15Incomp::CumulantK15Incomp(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu index fc108ef1ef109a40735e250bd9a0f21491e4f977..01b60b3bf8067a81f99b912c4c0c700963f5448c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -154,33 +154,33 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega, //unsigned int ktne = k; //unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu index c4311309e4653f2862e303dacb3e2d07646a5061..7645703e0d40176b136762d6b48633f4a9c0d950 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu @@ -2,6 +2,7 @@ #include "MRTIncompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level) { @@ -10,33 +11,18 @@ std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Par void MRTIncompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; - int numberOfThreads = para->getParD(level)->numberofthreads; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Kernel_MRT_Incomp_SP_27 << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->numberOfNodes, - para->getParD(level)->isEvenTimestep); - getLastCudaError("LB_Kernel_MRT_SP_27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Kernel_MRT_Incomp_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->numberOfNodes, + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Kernel_MRT_Incomp_SP_27 execution failed"); } MRTIncompSP27::MRTIncompSP27(std::shared_ptr<Parameter> para, int level) diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu index f6a283c2f9ba3c15729061ebeabcf34edd0abe97..a6663cc3c72696fda2ce9819203cd19195088730 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu @@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega, //unsigned int ktne = k; unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00 ])[k ];//ke - real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0 ])[k ];//kn - real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P ])[k ];//kt - real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0 ])[k ];//kne - real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0 ])[ks ];//kse - real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0 ])[kw ];//knw - real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P ])[k ];//kte - real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M ])[kb ];//kbe - real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P ])[kw ];//ktw - real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP ])[k ];//ktn - real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM ])[kb ];//kbn - real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP ])[ks ];//kts + real mfcbb = (D.f[DIR_P00])[k];//[ke ];// + c2over27 ;(D.f[DIR_P00])[k ];//ke + real mfabb = (D.f[DIR_M00])[kw];//[kw ];// + c2over27 ;(D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k];//[kn ];// + c2over27 ;(D.f[DIR_0P0])[k ];//kn + real mfbab = (D.f[DIR_0M0])[ks];//[ks ];// + c2over27 ;(D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k];//[kt ];// + c2over27 ;(D.f[DIR_00P])[k ];//kt + real mfbba = (D.f[DIR_00M])[kb];//[kb ];// + c2over27 ;(D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k];//[kne ];// + c1over54 ;(D.f[DIR_PP0])[k ];//kne + real mfaab = (D.f[DIR_MM0])[ksw];//[ksw ];// + c1over54 ;(D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks];//[kse ];// + c1over54 ;(D.f[DIR_PM0])[ks ];//kse + real mfacb = (D.f[DIR_MP0])[kw];//[knw ];// + c1over54 ;(D.f[DIR_MP0])[kw ];//knw + real mfcbc = (D.f[DIR_P0P])[k];//[kte ];// + c1over54 ;(D.f[DIR_P0P])[k ];//kte + real mfaba = (D.f[DIR_M0M])[kbw];//[kbw ];// + c1over54 ;(D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb];//[kbe ];// + c1over54 ;(D.f[DIR_P0M])[kb ];//kbe + real mfabc = (D.f[DIR_M0P])[kw];//[ktw ];// + c1over54 ;(D.f[DIR_M0P])[kw ];//ktw + real mfbcc = (D.f[DIR_0PP])[k];//[ktn ];// + c1over54 ;(D.f[DIR_0PP])[k ];//ktn + real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs ];// + c1over54 ;(D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb];//[kbn ];// + c1over54 ;(D.f[DIR_0PM])[kb ];//kbn + real mfbac = (D.f[DIR_0MP])[ks];//[kts ];// + c1over54 ;(D.f[DIR_0MP])[ks ];//kts real mfbbb = (D.f[DIR_000])[k];//[kzero];// + c8over27 ;(D.f[DIR_000])[k ];//kzero - real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP ])[k ];//ktne - real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP ])[ksw];//ktsw - real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP ])[ks ];//ktse - real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP ])[kw ];//ktnw - real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM ])[kb ];//kbne - real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM ])[kbsw]; - real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM ])[kbs];//kbse - real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM ])[kbw];//kbnw + real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// + c1over216;(D.f[DIR_PPP])[k ];//ktne + real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// + c1over216;(D.f[DIR_MMP])[ksw];//ktsw + real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// + c1over216;(D.f[DIR_PMP])[ks ];//ktse + real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// + c1over216;(D.f[DIR_MPP])[kw ];//ktnw + real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// + c1over216;(D.f[DIR_PPM])[kb ];//kbne + real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// + c1over216;(D.f[DIR_MMM])[kbsw]; + real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// + c1over216;(D.f[DIR_PMM])[kbs];//kbse + real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// + c1over216;(D.f[DIR_MPM])[kbw];//kbnw //////////////////////////////////////////////////////////////////////////////////// //slow //real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu index 77527d5bedab08fdcacb3a103727ae25274b2aa4..43724f9165e2bb8dca1705ae0053612df92413ec 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu @@ -11,7 +11,7 @@ std::shared_ptr<PMCumulantOneCompSP27> PMCumulantOneCompSP27::getNewInstance(std void PMCumulantOneCompSP27::run() { - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int numberOfThreads = para->getParD(level)->numberofthreads; int Grid = (size_Mat / numberOfThreads) + 1; @@ -30,7 +30,8 @@ void PMCumulantOneCompSP27::run() dim3 threads(numberOfThreads, 1, 1); for (int i = 0; i < pm.size(); i++) { - LB_Kernel_PM_Cum_One_Comp_SP_27 << < grid, threads >> >(para->getParD(level)->omega, + LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>( + para->getParD(level)->omega, para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu index 89975d1663fb236295c22b81af4b0544ffc489bb..4f5f61f9d7a61fee8fd3438de5c588c861d8604c 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu @@ -11,7 +11,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, real porosity, @@ -24,63 +24,63 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega, Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes]; + D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes]; + D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes]; + D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes]; + D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh index 6533c604f32a478cdc6a097e4dd7d0b56e48150d..f2cf530b5d331c71d4a13bd5882a3657a3bbddea 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh @@ -9,7 +9,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega, unsigned int* neighborY, unsigned int* neighborZ, real* DDStart, - int size_Mat, + unsigned long long numberOfLBnodes, int level, real* forces, real porosity, diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu deleted file mode 100644 index a9d518d14a286ae3f6b565176969162994afa269..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu +++ /dev/null @@ -1,57 +0,0 @@ -#include "TurbulentViscosityCumulantK17CompChim.h" -#include "cuda/CudaGrid.h" -#include <logger/Logger.h> -#include "Parameter/Parameter.h" -#include "TurbulentViscosityCumulantK17CompChim_Device.cuh" - -template<TurbulenceModel turbulenceModel> -std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > TurbulentViscosityCumulantK17CompChim<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level) -{ - return std::shared_ptr<TurbulentViscosityCumulantK17CompChim<turbulenceModel> >(new TurbulentViscosityCumulantK17CompChim<turbulenceModel>(para,level)); -} - -template<TurbulenceModel turbulenceModel> -void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run() -{ - vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes); - - LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel > <<< grid.grid, grid.threads >>>( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->rho, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->turbViscosity, - para->getSGSConstant(), - (unsigned long)para->getParD(level)->numberOfNodes, - level, - para->getIsBodyForce(), - para->getForcesDev(), - para->getParD(level)->forceX_SP, - para->getParD(level)->forceY_SP, - para->getParD(level)->forceZ_SP, - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); - - getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed"); -} - -template<TurbulenceModel turbulenceModel> -TurbulentViscosityCumulantK17CompChim<turbulenceModel>::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level) -{ - this->para = para; - this->level = level; - - myPreProcessorTypes.push_back(InitCompSP27); - - myKernelGroup = BasicKernel; - - VF_LOG_INFO("Using turbulence model: {}", turbulenceModel); -} - -template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>; -template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>; -template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h deleted file mode 100644 index 0d35b68c916e54c6ec6eeeacd7189fe4d9a33c10..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef TurbulentViscosityCUMULANT_K17_COMP_CHIM_H -#define TurbulentViscosityCUMULANT_K17_COMP_CHIM_H - -#include "Kernel/KernelImp.h" -#include "Parameter/Parameter.h" - -template<TurbulenceModel turbulenceModel> -class TurbulentViscosityCumulantK17CompChim : public KernelImp -{ -public: - static std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level); - void run(); - -private: - TurbulentViscosityCumulantK17CompChim(); - TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level); -}; - -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu deleted file mode 100644 index 32350b95107b68103af0f238fefe095882919092..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu +++ /dev/null @@ -1,687 +0,0 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ -// \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ -// -// This file is part of VirtualFluids. VirtualFluids is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \file TurbulentViscosityCumulantK17CompChim_Device.cu -//! \author Henry Korb, Henrik Asmuth -//! \date 16/05/2022 -//! \brief CumulantK17CompChim kernel by Martin Schönherr that inlcudes turbulent viscosity and other small mods. -//! -//! Additions to CumulantK17CompChim: -//! - can incorporate local body force -//! - when applying a local body force, the total round of error of forcing+bodyforce is saved and added in next time step -//! - uses turbulent viscosity that is computed in separate kernel (as of now AMD) -//! - saves macroscopic values (needed for instance for probes, AMD, and actuator models) -//! -//======================================================================================= -/* Device code */ -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include <lbm/constants/NumericConstants.h> -#include "Kernel/Utilities/DistributionHelper.cuh" - -#include "GPU/TurbulentViscosityInlines.cuh" - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; -#include "Kernel/Utilities/ChimeraTransformation.h" - - -//////////////////////////////////////////////////////////////////////////////// -template<TurbulenceModel turbulenceModel> -__global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( - real omega_in, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - real* rho, - real* vx, - real* vy, - real* vz, - real* turbulentViscosity, - real SGSconstant, - unsigned long size_Mat, - int level, - bool bodyForce, - real* forces, - real* bodyForceX, - real* bodyForceY, - real* bodyForceZ, - real* quadricLimiters, - bool isEvenTimestep) -{ - ////////////////////////////////////////////////////////////////////////// - //! Cumulant K17 Kernel is based on \ref - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a> - //! - //! The cumulant kernel is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. - //! - const unsigned k_000 = vf::gpu::getNodeIndex(); - - ////////////////////////////////////////////////////////////////////////// - // run for all indices in size_Mat and fluid nodes - if ((k_000 < size_Mat) && (typeOfGridNode[k_000] == GEO_FLUID)) { - ////////////////////////////////////////////////////////////////////////// - //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on - //! timestep is based on the esoteric twist algorithm \ref <a - //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), - //! DOI:10.3390/computation5020019 ]</b></a> - //! - Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, size_Mat, isEvenTimestep); - - //////////////////////////////////////////////////////////////////////////////// - //! - Set neighbor indices (necessary for indirect addressing) - uint k_M00 = neighborX[k_000]; - uint k_0M0 = neighborY[k_000]; - uint k_00M = neighborZ[k_000]; - uint k_MM0 = neighborY[k_M00]; - uint k_M0M = neighborZ[k_M00]; - uint k_0MM = neighborZ[k_0M0]; - uint k_MMM = neighborZ[k_MM0]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions - //! - real f_000 = (dist.f[DIR_000])[k_000]; - real f_P00 = (dist.f[DIR_P00])[k_000]; - real f_M00 = (dist.f[DIR_M00])[k_M00]; - real f_0P0 = (dist.f[DIR_0P0])[k_000]; - real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; - real f_00P = (dist.f[DIR_00P])[k_000]; - real f_00M = (dist.f[DIR_00M])[k_00M]; - real f_PP0 = (dist.f[DIR_PP0])[k_000]; - real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; - real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; - real f_MP0 = (dist.f[DIR_MP0])[k_M00]; - real f_P0P = (dist.f[DIR_P0P])[k_000]; - real f_M0M = (dist.f[DIR_M0M])[k_M0M]; - real f_P0M = (dist.f[DIR_P0M])[k_00M]; - real f_M0P = (dist.f[DIR_M0P])[k_M00]; - real f_0PP = (dist.f[DIR_0PP])[k_000]; - real f_0MM = (dist.f[DIR_0MM])[k_0MM]; - real f_0PM = (dist.f[DIR_0PM])[k_00M]; - real f_0MP = (dist.f[DIR_0MP])[k_0M0]; - real f_PPP = (dist.f[DIR_PPP])[k_000]; - real f_MPP = (dist.f[DIR_MPP])[k_M00]; - real f_PMP = (dist.f[DIR_PMP])[k_0M0]; - real f_MMP = (dist.f[DIR_MMP])[k_MM0]; - real f_PPM = (dist.f[DIR_PPM])[k_00M]; - real f_MPM = (dist.f[DIR_MPM])[k_M0M]; - real f_PMM = (dist.f[DIR_PMM])[k_0MM]; - real f_MMM = (dist.f[DIR_MMM])[k_MMM]; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Define aliases to use the same variable for the moments (m's): - //! - real& m_111 = f_000; - real& m_211 = f_P00; - real& m_011 = f_M00; - real& m_121 = f_0P0; - real& m_101 = f_0M0; - real& m_112 = f_00P; - real& m_110 = f_00M; - real& m_221 = f_PP0; - real& m_001 = f_MM0; - real& m_201 = f_PM0; - real& m_021 = f_MP0; - real& m_212 = f_P0P; - real& m_010 = f_M0M; - real& m_210 = f_P0M; - real& m_012 = f_M0P; - real& m_122 = f_0PP; - real& m_100 = f_0MM; - real& m_120 = f_0PM; - real& m_102 = f_0MP; - real& m_222 = f_PPP; - real& m_022 = f_MPP; - real& m_202 = f_PMP; - real& m_002 = f_MMP; - real& m_220 = f_PPM; - real& m_020 = f_MPM; - real& m_200 = f_PMM; - real& m_000 = f_MMM; - - //////////////////////////////////////////////////////(unsigned long)////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + - (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + - ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + - ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + - f_000; - - real oneOverRho = c1o1 / (c1o1 + drho); - - real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + - (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * - oneOverRho; - real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + - (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * - oneOverRho; - real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + - (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * - oneOverRho; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - real factor = c1o1; - for (size_t i = 1; i <= level; i++) { - factor *= c2o1; - } - - real fx = forces[0]; - real fy = forces[1]; - real fz = forces[2]; - - if( bodyForce ){ - fx += bodyForceX[k_000]; - fy += bodyForceY[k_000]; - fz += bodyForceZ[k_000]; - - real vx = vvx; - real vy = vvy; - real vz = vvz; - real acc_x = fx * c1o2 / factor; - real acc_y = fy * c1o2 / factor; - real acc_z = fz * c1o2 / factor; - - vvx += acc_x; - vvy += acc_y; - vvz += acc_z; - - // // Reset body force. To be used when not using round-off correction. - // bodyForceX[k] = 0.0f; - // bodyForceY[k] = 0.0f; - // bodyForceZ[k] = 0.0f; - - //////////////////////////////////////////////////////////////////////////////////// - //!> Round-off correction - //! - //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) - //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation. - //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can - //!> differ by several orders of magnitude. - //!> \note 16/05/2022: Testing, still ongoing! - //! - bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1; - bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1; - bodyForceZ[k_000] = (acc_z-(vvz-vz))*factor*c2o1; - } - else{ - vvx += fx * c1o2 / factor; - vvy += fy * c1o2 / factor; - vvz += fz * c1o2 / factor; - } - - - //////////////////////////////////////////////////////////////////////////////////// - // calculate the square of velocities for this lattice node - real vx2 = vvx * vvx; - real vy2 = vvy * vvy; - real vz2 = vvz * vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to - //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - real quadricLimitP = quadricLimiters[0]; - real quadricLimitM = quadricLimiters[1]; - real quadricLimitD = quadricLimiters[2]; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a - //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - forwardInverseChimeraWithK(f_MMM, f_MM0, f_MMP, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(f_M0M, f_M00, f_M0P, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(f_MPM, f_MP0, f_MPP, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(f_0MM, f_0M0, f_0MP, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(f_00M, f_000, f_00P, vvz, vz2, c9o4, c4o9); - forwardInverseChimeraWithK(f_0PM, f_0P0, f_0PP, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(f_PMM, f_PM0, f_PMP, vvz, vz2, c36o1, c1o36); - forwardInverseChimeraWithK(f_P0M, f_P00, f_P0P, vvz, vz2, c9o1, c1o9); - forwardInverseChimeraWithK(f_PPM, f_PP0, f_PPP, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - forwardInverseChimeraWithK(f_MMM, f_M0M, f_MPM, vvy, vy2, c6o1, c1o6); - forwardChimera( f_MM0, f_M00, f_MP0, vvy, vy2); - forwardInverseChimeraWithK(f_MMP, f_M0P, f_MPP, vvy, vy2, c18o1, c1o18); - forwardInverseChimeraWithK(f_0MM, f_00M, f_0PM, vvy, vy2, c3o2, c2o3); - forwardChimera( f_0M0, f_000, f_0P0, vvy, vy2); - forwardInverseChimeraWithK(f_0MP, f_00P, f_0PP, vvy, vy2, c9o2, c2o9); - forwardInverseChimeraWithK(f_PMM, f_P0M, f_PPM, vvy, vy2, c6o1, c1o6); - forwardChimera( f_PM0, f_P00, f_PP0, vvy, vy2); - forwardInverseChimeraWithK(f_PMP, f_P0P, f_PPP, vvy, vy2, c18o1, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - forwardInverseChimeraWithK(f_MMM, f_0MM, f_PMM, vvx, vx2, c1o1, c1o1); - forwardChimera( f_M0M, f_00M, f_P0M, vvx, vx2); - forwardInverseChimeraWithK(f_MPM, f_0PM, f_PPM, vvx, vx2, c3o1, c1o3); - forwardChimera( f_MM0, f_0M0, f_PM0, vvx, vx2); - forwardChimera( f_M00, f_000, f_P00, vvx, vx2); - forwardChimera( f_MP0, f_0P0, f_PP0, vvx, vx2); - forwardInverseChimeraWithK(f_MMP, f_0MP, f_PMP, vvx, vx2, c3o1, c1o3); - forwardChimera( f_M0P, f_00P, f_P0P, vvx, vx2); - forwardInverseChimeraWithK(f_MPP, f_0PP, f_PPP, vvx, vx2, c3o1, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations - //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. - //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk - //! viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. - //! - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz - //! \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$. - //! - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz - //! \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$. - //! - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with - //! simplifications assuming \f$ \omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification - //! all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. - //! - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$. - //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. - //! - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate modified omega with turbulent viscosity - //! - real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k_000]); - //////////////////////////////////////////////////////////// - // 2. - real OxxPyyPzz = c1o1; - //////////////////////////////////////////////////////////// - // 3. - real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega); - real OxyyMxzz = - c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega); - real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) / - (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega); - //////////////////////////////////////////////////////////// - // 4. - real O4 = c1o1; - //////////////////////////////////////////////////////////// - // 5. - real O5 = c1o1; - //////////////////////////////////////////////////////////// - // 6. - real O6 = c1o1; - - //////////////////////////////////////////////////////////////////////////////////// - //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for - //! different bulk viscosity). - //! - real factorA = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega); - real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants from central moments according to Eq. (20)-(23) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - //////////////////////////////////////////////////////////// - // 4. - real c_211 = m_211 - ((m_200 + c1o3) * m_011 + c2o1 * m_110 * m_101) * oneOverRho; - real c_121 = m_121 - ((m_020 + c1o3) * m_101 + c2o1 * m_110 * m_011) * oneOverRho; - real c_112 = m_112 - ((m_002 + c1o3) * m_110 + c2o1 * m_101 * m_011) * oneOverRho; - - real c_220 = m_220 - (((m_200 * m_020 + c2o1 * m_110 * m_110) + c1o3 * (m_200 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho)); - real c_202 = m_202 - (((m_200 * m_002 + c2o1 * m_101 * m_101) + c1o3 * (m_200 + m_002)) * oneOverRho - c1o9 * (drho * oneOverRho)); - real c_022 = m_022 - (((m_002 * m_020 + c2o1 * m_011 * m_011) + c1o3 * (m_002 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho)); - //////////////////////////////////////////////////////////// - // 5. - real c_122 = - m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) + - c1o3 * (m_120 + m_102)) * - oneOverRho; - real c_212 = - m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) + - c1o3 * (m_210 + m_012)) * - oneOverRho; - real c_221 = - m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) + - c1o3 * (m_021 + m_201)) * - oneOverRho; - //////////////////////////////////////////////////////////// - // 6. - real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) - - c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) - - c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) * - oneOverRho + - (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + - c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) * - oneOverRho * oneOverRho - - c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho + - (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + - (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) * - oneOverRho * oneOverRho * c2o3 + - c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho)); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute linear combinations of second and third order cumulants - //! - //////////////////////////////////////////////////////////// - // 2. - real mxxPyyPzz = m_200 + m_020 + m_002; - real mxxMyy = m_200 - m_020; - real mxxMzz = m_200 - m_002; - //////////////////////////////////////////////////////////// - // 3. - real mxxyPyzz = m_210 + m_012; - real mxxyMyzz = m_210 - m_012; - - real mxxzPyyz = m_201 + m_021; - real mxxzMyyz = m_201 - m_021; - - real mxyyPxzz = m_120 + m_102; - real mxyyMxzz = m_120 - m_102; - - //////////////////////////////////////////////////////////////////////////////////// - // incl. correction - //////////////////////////////////////////////////////////// - //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times - //! the gradients later. - //! - real Dxy = -c3o1 * omega * m_110; - real Dxz = -c3o1 * omega * m_101; - real Dyz = -c3o1 * omega * m_011; - real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz); - real dyuy = dxux + omega * c3o2 * mxxMyy; - real dzuz = dxux + omega * c3o2 * mxxMzz; - - //////////////////////////////////////////////////////////////////////////////////// - switch (turbulenceModel) - { - case TurbulenceModel::AMD: //AMD is computed in separate kernel - break; - case TurbulenceModel::Smagorinsky: - turbulentViscosity[k_000] = calcTurbulentViscositySmagorinsky(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz); - break; - case TurbulenceModel::QR: - turbulentViscosity[k_000] = calcTurbulentViscosityQR(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz); - break; - default: - break; - } - //////////////////////////////////////////////////////////// - //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); - mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - - //////////////////////////////////////////////////////////////////////////////////// - ////no correction - // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); - // mxxMyy += -(-omega) * (-mxxMyy); - // mxxMzz += -(-omega) * (-mxxMzz); - ////////////////////////////////////////////////////////////////////////// - m_011 += omega * (-m_011); - m_101 += omega * (-m_101); - m_110 += omega * (-m_110); - - //////////////////////////////////////////////////////////////////////////////////// - // relax - ////////////////////////////////////////////////////////////////////////// - // incl. limiter - //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD); - m_111 += wadjust * (-m_111); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + quadricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + quadricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + quadricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + quadricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + quadricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - // no limiter - // mfbbb += OxyyMxzz * (-mfbbb); - // mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - // mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - // mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - // mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - // mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - // mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute inverse linear combinations of second and third order cumulants - //! - m_200 = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - m_020 = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz); - m_002 = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz); - - m_210 = ( mxxyMyzz + mxxyPyzz) * c1o2; - m_012 = (-mxxyMyzz + mxxyPyzz) * c1o2; - m_201 = ( mxxzMyyz + mxxzPyyz) * c1o2; - m_021 = (-mxxzMyyz + mxxzPyyz) * c1o2; - m_120 = ( mxyyMxzz + mxyyPxzz) * c1o2; - m_102 = (-mxyyMxzz + mxyyPxzz) * c1o2; - ////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - // 4. - // no limiter - //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according - //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - c_022 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_022); - c_202 = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_202); - c_220 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (c_220); - c_112 = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * factorB + (c1o1 - O4) * (c_112); - c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (c_121); - c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (c_211); - - - ////////////////////////////////////////////////////////////////////////// - // 5. - c_122 += O5 * (-c_122); - c_212 += O5 * (-c_212); - c_221 += O5 * (-c_221); - - ////////////////////////////////////////////////////////////////////////// - // 6. - c_222 += O6 * (-c_222); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), - //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - - ////////////////////////////////////////////////////////////////////////// - // 4. - m_211 = c_211 + c1o3 * ((c3o1 * m_200 + c1o1) * m_011 + c6o1 * m_110 * m_101) * oneOverRho; - m_121 = c_121 + c1o3 * ((c3o1 * m_020 + c1o1) * m_101 + c6o1 * m_110 * m_011) * oneOverRho; - m_112 = c_112 + c1o3 * ((c3o1 * m_002 + c1o1) * m_110 + c6o1 * m_101 * m_011) * oneOverRho; - - m_220 = - c_220 + (((m_200 * m_020 + c2o1 * m_110 * m_110) * c9o1 + c3o1 * (m_200 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9; - m_202 = - c_202 + (((m_200 * m_002 + c2o1 * m_101 * m_101) * c9o1 + c3o1 * (m_200 + m_002)) * oneOverRho - (drho * oneOverRho)) * c1o9; - m_022 = - c_022 + (((m_002 * m_020 + c2o1 * m_011 * m_011) * c9o1 + c3o1 * (m_002 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9; - - ////////////////////////////////////////////////////////////////////////// - // 5. - m_122 = c_122 + c1o3 * - (c3o1 * (m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) + - (m_120 + m_102)) * oneOverRho; - m_212 = c_212 + c1o3 * - (c3o1 * (m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) + - (m_210 + m_012)) * oneOverRho; - m_221 = c_221 + c1o3 * - (c3o1 * (m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) + - (m_021 + m_201)) * oneOverRho; - - ////////////////////////////////////////////////////////////////////////// - // 6. - m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) - - c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) - - c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) * - oneOverRho + - (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) + - c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) * - oneOverRho * oneOverRho - - c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho + - (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) + - (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) * - oneOverRho * oneOverRho * c2o3 + - c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho)); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - m_100 = -m_100; - m_010 = -m_010; - m_001 = -m_001; - - //Write to array here to distribute read/write - rho[k_000] = drho; - vx[k_000] = vvx; - vy[k_000] = vvy; - vz[k_000] = vvz; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a - //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 - //! ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - backwardInverseChimeraWithK(m_000, m_100, m_200, vvx, vx2, c1o1, c1o1); - backwardChimera( m_010, m_110, m_210, vvx, vx2); - backwardInverseChimeraWithK(m_020, m_120, m_220, vvx, vx2, c3o1, c1o3); - backwardChimera( m_001, m_101, m_201, vvx, vx2); - backwardChimera( m_011, m_111, m_211, vvx, vx2); - backwardChimera( m_021, m_121, m_221, vvx, vx2); - backwardInverseChimeraWithK(m_002, m_102, m_202, vvx, vx2, c3o1, c1o3); - backwardChimera( m_012, m_112, m_212, vvx, vx2); - backwardInverseChimeraWithK(m_022, m_122, m_222, vvx, vx2, c9o1, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - backwardInverseChimeraWithK(m_000, m_010, m_020, vvy, vy2, c6o1, c1o6); - backwardChimera( m_001, m_011, m_021, vvy, vy2); - backwardInverseChimeraWithK(m_002, m_012, m_022, vvy, vy2, c18o1, c1o18); - backwardInverseChimeraWithK(m_100, m_110, m_120, vvy, vy2, c3o2, c2o3); - backwardChimera( m_101, m_111, m_121, vvy, vy2); - backwardInverseChimeraWithK(m_102, m_112, m_122, vvy, vy2, c9o2, c2o9); - backwardInverseChimeraWithK(m_200, m_210, m_220, vvy, vy2, c6o1, c1o6); - backwardChimera( m_201, m_211, m_221, vvy, vy2); - backwardInverseChimeraWithK(m_202, m_212, m_222, vvy, vy2, c18o1, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - backwardInverseChimeraWithK(m_000, m_001, m_002, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(m_010, m_011, m_012, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(m_020, m_021, m_022, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(m_100, m_101, m_102, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(m_110, m_111, m_112, vvz, vz2, c9o4, c4o9); - backwardInverseChimeraWithK(m_120, m_121, m_122, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(m_200, m_201, m_202, vvz, vz2, c36o1, c1o36); - backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz2, c9o1, c1o9); - backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz2, c36o1, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Write distributions: style of reading and writing the distributions from/to - //! stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), - //! DOI:10.3390/computation5020019 ]</b></a> - //! - (dist.f[DIR_P00])[k_000] = f_M00; - (dist.f[DIR_M00])[k_M00] = f_P00; - (dist.f[DIR_0P0])[k_000] = f_0M0; - (dist.f[DIR_0M0])[k_0M0] = f_0P0; - (dist.f[DIR_00P])[k_000] = f_00M; - (dist.f[DIR_00M])[k_00M] = f_00P; - (dist.f[DIR_PP0])[k_000] = f_MM0; - (dist.f[DIR_MM0])[k_MM0] = f_PP0; - (dist.f[DIR_PM0])[k_0M0] = f_MP0; - (dist.f[DIR_MP0])[k_M00] = f_PM0; - (dist.f[DIR_P0P])[k_000] = f_M0M; - (dist.f[DIR_M0M])[k_M0M] = f_P0P; - (dist.f[DIR_P0M])[k_00M] = f_M0P; - (dist.f[DIR_M0P])[k_M00] = f_P0M; - (dist.f[DIR_0PP])[k_000] = f_0MM; - (dist.f[DIR_0MM])[k_0MM] = f_0PP; - (dist.f[DIR_0PM])[k_00M] = f_0MP; - (dist.f[DIR_0MP])[k_0M0] = f_0PM; - (dist.f[DIR_000])[k_000] = f_000; - (dist.f[DIR_PPP])[k_000] = f_MMM; - (dist.f[DIR_PMP])[k_0M0] = f_MPM; - (dist.f[DIR_PPM])[k_00M] = f_MMP; - (dist.f[DIR_PMM])[k_0MM] = f_MPP; - (dist.f[DIR_MPP])[k_M00] = f_PMM; - (dist.f[DIR_MMP])[k_MM0] = f_PPM; - (dist.f[DIR_MPM])[k_M0M] = f_PMP; - (dist.f[DIR_MMM])[k_MMM] = f_PPP; - } -} - -template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::AMD > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); - -template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::Smagorinsky > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); - -template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::QR > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep); diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh deleted file mode 100644 index 5ef37557399f263d25edf03b02b00f6a03c6e1cb..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef LB_Kernel_TURBULENT_VISCOSITY_CUMULANT_K17_COMP_CHIM_H -#define LB_Kernel_TURBULENT_VISCOSITY_CUMULANT_K17_COMP_CHIM_H - -#include <DataTypes.h> -#include <curand.h> - -template< TurbulenceModel turbulenceModel > __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim( - real omega_in, - uint* typeOfGridNode, - uint* neighborX, - uint* neighborY, - uint* neighborZ, - real* distributions, - real* rho, - real* vx, - real* vy, - real* vz, - real* turbulentViscosity, - real SGSconstant, - unsigned long size_Mat, - int level, - bool bodyForce, - real* forces, - real* bodyForceX, - real* bodyForceY, - real* bodyForceZ, - real* quadricLimiters, - bool isEvenTimestep); -#endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp deleted file mode 100644 index f3615a89994f0ca1fafdc1eda905d3c3b615d478..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "TurbulentViscosityFluidFlowCompStrategy.h" - -#include "Parameter/Parameter.h" - -std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> TurbulentViscosityFluidFlowCompStrategy::getInstance() -{ - static std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> uniqueInstance; - if (!uniqueInstance) - uniqueInstance = std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy>(new TurbulentViscosityFluidFlowCompStrategy()); - return uniqueInstance; -} - -bool TurbulentViscosityFluidFlowCompStrategy::checkParameter(std::shared_ptr<Parameter> para) -{ - if (!para->getUseTurbulentViscosity()) - return false; - else if (!para->getCompOn()) - return false; - else - return true; -} - -TurbulentViscosityFluidFlowCompStrategy::TurbulentViscosityFluidFlowCompStrategy() {} diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h deleted file mode 100644 index 95eff17777f7f0d1c3e05fe1b0d93892a88646a4..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef AMD_FLUID_FLOW_COMP_STRATEGY_H -#define AMD_FLUID_FLOW_COMP_STRATEGY_H - -#include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h" - - -class TurbulentViscosityFluidFlowCompStrategy : public CheckParameterStrategy -{ -public: - static std::shared_ptr<TurbulentViscosityFluidFlowCompStrategy> getInstance(); - - bool checkParameter(std::shared_ptr<Parameter> para); - -private: - TurbulentViscosityFluidFlowCompStrategy(); - -}; -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu index cfcb70cd2bd6f3cc8ec4349650c44b7d3b0619fc..2b8a7d61e8966e2ed00022986311ae68ac0ca6d6 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu @@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK15Comp> WaleCumulantK15Comp::getNewInstance(std::sh void WaleCumulantK15Comp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int numberOfThreads = para->getParD(level)->numberofthreads; int Grid = (size_Mat / numberOfThreads) + 1; @@ -28,22 +28,23 @@ void WaleCumulantK15Comp::run() dim3 grid(Grid1, Grid2, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_WaleCumulantK15Comp << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->neighborInverse, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->turbViscosity, - para->getParD(level)->numberOfNodes, - level, - para->getTimestepOfCoarseLevel(), - para->getForcesDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_WaleCumulantK15Comp <<< grid, threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->neighborInverse, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->turbViscosity, + para->getParD(level)->numberOfNodes, + level, + para->getTimestepOfCoarseLevel(), + para->getForcesDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_WaleCumulantK15Comp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu index 3da25060e6c82ea685a1659fecc8cf66eeaf44c4..a7018d1246c0832753df144ffbf2625b55f5508e 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu @@ -46,63 +46,63 @@ __global__ void LB_Kernel_WaleCumulantK15Comp( Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -136,33 +136,33 @@ __global__ void LB_Kernel_WaleCumulantK15Comp( unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; real mfaaa = (D.f[DIR_MMM])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu index 15b808279a4c9dc771531f118cb369b7c5380a84..49ee20b44f37b01cd9bc837024a47c1428c00a18 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu @@ -10,7 +10,7 @@ std::shared_ptr<WaleBySoniMalavCumulantK15Comp> WaleBySoniMalavCumulantK15Comp:: void WaleBySoniMalavCumulantK15Comp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int numberOfThreads = para->getParD(level)->numberofthreads; //int Grid = size_Array / numberOfThreads; @@ -32,21 +32,22 @@ void WaleBySoniMalavCumulantK15Comp::run() dim3 grid(Grid1, Grid2, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_WaleBySoniMalavCumulantK15Comp << < grid, threads >> >( para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->neighborInverse, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->turbViscosity, - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_WaleBySoniMalavCumulantK15Comp <<< grid, threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->neighborInverse, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->turbViscosity, + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_WaleBySoniMalavCumulantK15Comp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu index 511219c352c4d156428565f718191a70b9cc6c32..6258c72c36cafa27b06b2934db42a5813ed74f99 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu @@ -45,63 +45,63 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp( Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -115,33 +115,33 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp( unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; real mfaaa = (D.f[DIR_MMM])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu index 5eeea51301c666cf17546c85a444413111bebf2c..c9c16e2d2d2259656248948f3f10977c8f18fd24 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu @@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17Comp> WaleCumulantK17Comp::getNewInstance(std::sh void WaleCumulantK17Comp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int numberOfThreads = para->getParD(level)->numberofthreads; //int Grid = size_Array / numberOfThreads; @@ -32,23 +32,24 @@ void WaleCumulantK17Comp::run() dim3 grid(Grid1, Grid2, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>(para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->neighborInverse, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->turbViscosity, - para->getParD(level)->numberOfNodes, - level, - para->getTimestepOfCoarseLevel(), - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->neighborInverse, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->turbViscosity, + para->getParD(level)->numberOfNodes, + level, + para->getTimestepOfCoarseLevel(), + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_WaleCumulantK17Comp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu index 8aaa13ab1d868e15ea5707d1566ba653b44c645d..e3161e0d26efe8993bb4b6c34bda32bf15af5d3d 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu @@ -47,63 +47,63 @@ __global__ void LB_Kernel_WaleCumulantK17Comp( Distributions27 D; if (EvenOrOdd==true) { - D.f[DIR_P00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00 ] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00 ] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0 ] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0 ] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M ] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P ] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0 ] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0 ] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0 ] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0 ] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M ] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P ] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P ] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M ] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM ] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP ] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP ] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM ] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// @@ -137,33 +137,33 @@ __global__ void LB_Kernel_WaleCumulantK17Comp( unsigned int kbsw = neighborZ[ksw]; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - real mfcbb = (D.f[DIR_P00 ])[k ]; - real mfabb = (D.f[DIR_M00 ])[kw ]; - real mfbcb = (D.f[DIR_0P0 ])[k ]; - real mfbab = (D.f[DIR_0M0 ])[ks ]; - real mfbbc = (D.f[DIR_00P ])[k ]; - real mfbba = (D.f[DIR_00M ])[kb ]; - real mfccb = (D.f[DIR_PP0 ])[k ]; - real mfaab = (D.f[DIR_MM0 ])[ksw]; - real mfcab = (D.f[DIR_PM0 ])[ks ]; - real mfacb = (D.f[DIR_MP0 ])[kw ]; - real mfcbc = (D.f[DIR_P0P ])[k ]; - real mfaba = (D.f[DIR_M0M ])[kbw]; - real mfcba = (D.f[DIR_P0M ])[kb ]; - real mfabc = (D.f[DIR_M0P ])[kw ]; - real mfbcc = (D.f[DIR_0PP ])[k ]; - real mfbaa = (D.f[DIR_0MM ])[kbs]; - real mfbca = (D.f[DIR_0PM ])[kb ]; - real mfbac = (D.f[DIR_0MP ])[ks ]; + real mfcbb = (D.f[DIR_P00])[k ]; + real mfabb = (D.f[DIR_M00])[kw ]; + real mfbcb = (D.f[DIR_0P0])[k ]; + real mfbab = (D.f[DIR_0M0])[ks ]; + real mfbbc = (D.f[DIR_00P])[k ]; + real mfbba = (D.f[DIR_00M])[kb ]; + real mfccb = (D.f[DIR_PP0])[k ]; + real mfaab = (D.f[DIR_MM0])[ksw]; + real mfcab = (D.f[DIR_PM0])[ks ]; + real mfacb = (D.f[DIR_MP0])[kw ]; + real mfcbc = (D.f[DIR_P0P])[k ]; + real mfaba = (D.f[DIR_M0M])[kbw]; + real mfcba = (D.f[DIR_P0M])[kb ]; + real mfabc = (D.f[DIR_M0P])[kw ]; + real mfbcc = (D.f[DIR_0PP])[k ]; + real mfbaa = (D.f[DIR_0MM])[kbs]; + real mfbca = (D.f[DIR_0PM])[kb ]; + real mfbac = (D.f[DIR_0MP])[ks ]; real mfbbb = (D.f[DIR_000])[k ]; - real mfccc = (D.f[DIR_PPP ])[k ]; - real mfaac = (D.f[DIR_MMP ])[ksw]; - real mfcac = (D.f[DIR_PMP ])[ks ]; - real mfacc = (D.f[DIR_MPP ])[kw ]; - real mfcca = (D.f[DIR_PPM ])[kb ]; + real mfccc = (D.f[DIR_PPP])[k ]; + real mfaac = (D.f[DIR_MMP])[ksw]; + real mfcac = (D.f[DIR_PMP])[ks ]; + real mfacc = (D.f[DIR_MPP])[kw ]; + real mfcca = (D.f[DIR_PPM])[kb ]; real mfaaa = (D.f[DIR_MMM])[kbsw]; - real mfcaa = (D.f[DIR_PMM ])[kbs]; - real mfaca = (D.f[DIR_MPM ])[kbw]; + real mfcaa = (D.f[DIR_PMM])[kbs]; + real mfaca = (D.f[DIR_MPM])[kbw]; //////////////////////////////////////////////////////////////////////////////////// real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + (((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) + diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu index 98dca58f522bf02ce66328819e42c717f0ceef28..b3cdd494c02c6649d60818b6b264b8db8b79d426 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu @@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17DebugComp> WaleCumulantK17DebugComp::getNewInstan void WaleCumulantK17DebugComp::run() { - int size_Mat = para->getParD(level)->numberOfNodes; + int size_Mat = (int)para->getParD(level)->numberOfNodes; int numberOfThreads = para->getParD(level)->numberofthreads; //int Grid = size_Array / numberOfThreads; @@ -32,34 +32,34 @@ void WaleCumulantK17DebugComp::run() dim3 grid(Grid1, Grid2, 1); dim3 threads(numberOfThreads, 1, 1); - LB_Kernel_WaleCumulantK17DebugComp << < grid, threads >> >( - para->getParD(level)->omega, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->neighborInverse, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->distributions.f[0], - para->getParD(level)->turbViscosity, - para->getParD(level)->gSij, - para->getParD(level)->gSDij, - para->getParD(level)->gDxvx, - para->getParD(level)->gDyvx, - para->getParD(level)->gDzvx, - para->getParD(level)->gDxvy, - para->getParD(level)->gDyvy, - para->getParD(level)->gDzvy, - para->getParD(level)->gDxvz, - para->getParD(level)->gDyvz, - para->getParD(level)->gDzvz, - para->getParD(level)->numberOfNodes, - level, - para->getForcesDev(), - para->getQuadricLimitersDev(), - para->getParD(level)->isEvenTimestep); + LB_Kernel_WaleCumulantK17DebugComp <<< grid, threads >>>( + para->getParD(level)->omega, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->neighborInverse, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->distributions.f[0], + para->getParD(level)->turbViscosity, + para->getParD(level)->gSij, + para->getParD(level)->gSDij, + para->getParD(level)->gDxvx, + para->getParD(level)->gDyvx, + para->getParD(level)->gDzvx, + para->getParD(level)->gDxvy, + para->getParD(level)->gDyvy, + para->getParD(level)->gDzvy, + para->getParD(level)->gDxvz, + para->getParD(level)->gDyvz, + para->getParD(level)->gDzvz, + para->getParD(level)->numberOfNodes, + level, + para->getForcesDev(), + para->getQuadricLimitersDev(), + para->getParD(level)->isEvenTimestep); getLastCudaError("LB_Kernel_WaleCumulantK17DebugComp execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu index a1feba477a6555ea728311a6e99d5302652813ff..63f4ecc8716fcd606fb6a75709408b0885d781e9 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu @@ -57,63 +57,63 @@ __global__ void LB_Kernel_WaleCumulantK17DebugComp( Distributions27 D; if (EvenOrOdd == true) { - D.f[DIR_P00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_M00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_PMM]= &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_MPM]= &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_PMM]= &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_MPM]= &DDStart[DIR_MPM * size_Mat]; } else { - D.f[DIR_M00] = &DDStart[DIR_P00 *size_Mat]; - D.f[DIR_P00] = &DDStart[DIR_M00 *size_Mat]; - D.f[DIR_0M0] = &DDStart[DIR_0P0 *size_Mat]; - D.f[DIR_0P0] = &DDStart[DIR_0M0 *size_Mat]; - D.f[DIR_00M] = &DDStart[DIR_00P *size_Mat]; - D.f[DIR_00P] = &DDStart[DIR_00M *size_Mat]; - D.f[DIR_MM0] = &DDStart[DIR_PP0 *size_Mat]; - D.f[DIR_PP0] = &DDStart[DIR_MM0 *size_Mat]; - D.f[DIR_MP0] = &DDStart[DIR_PM0 *size_Mat]; - D.f[DIR_PM0] = &DDStart[DIR_MP0 *size_Mat]; - D.f[DIR_M0M] = &DDStart[DIR_P0P *size_Mat]; - D.f[DIR_P0P] = &DDStart[DIR_M0M *size_Mat]; - D.f[DIR_M0P] = &DDStart[DIR_P0M *size_Mat]; - D.f[DIR_P0M] = &DDStart[DIR_M0P *size_Mat]; - D.f[DIR_0MM] = &DDStart[DIR_0PP *size_Mat]; - D.f[DIR_0PP] = &DDStart[DIR_0MM *size_Mat]; - D.f[DIR_0MP] = &DDStart[DIR_0PM *size_Mat]; - D.f[DIR_0PM] = &DDStart[DIR_0MP *size_Mat]; - D.f[DIR_000] = &DDStart[DIR_000*size_Mat]; - D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat]; - D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat]; - D.f[DIR_MPM]= &DDStart[DIR_PMP *size_Mat]; - D.f[DIR_PMM]= &DDStart[DIR_MPP *size_Mat]; - D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat]; - D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat]; - D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat]; - D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat]; + D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat]; + D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat]; + D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat]; + D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat]; + D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat]; + D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat]; + D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat]; + D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat]; + D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat]; + D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat]; + D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat]; + D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat]; + D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat]; + D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat]; + D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat]; + D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat]; + D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat]; + D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat]; + D.f[DIR_000] = &DDStart[DIR_000 * size_Mat]; + D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat]; + D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat]; + D.f[DIR_MPM]= &DDStart[DIR_PMP * size_Mat]; + D.f[DIR_PMM]= &DDStart[DIR_MPP * size_Mat]; + D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat]; + D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat]; + D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat]; + D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h deleted file mode 100644 index f7822d63fa0efd34b27773dffdeebddf521a8792..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef CHIMERA_TRANSFORMATION_H -#define CHIMERA_TRANSFORMATION_H - -#include <lbm/constants/NumericConstants.h> - -using namespace vf::lbm::constant; - -//////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardInverseChimeraWithK -//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. -inline __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) -{ - real m2 = mfa + mfc; - real m1 = mfc - mfa; - real m0 = m2 + mfb; - mfa = m0; - m0 *= Kinverse; - m0 += c1o1; - mfb = (m1 * Kinverse - m0 * vv) * K; - mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief backward chimera transformation \ref backwardInverseChimeraWithK -//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. -inline __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) -{ - real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K; - real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K; - mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K; - mfa = m0; - mfb = m1; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardChimera -//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. -inline __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) -{ - real m1 = (mfa + mfc) + mfb; - real m2 = mfc - mfa; - mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2); - mfb = m2 - vv * m1; - mfa = m1; -} - -//////////////////////////////////////////////////////////////////////////////// -//! \brief backward chimera transformation \ref backwardChimera -//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. -inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) -{ - real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2); - real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv; - mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); - mfb = mb; - mfa = ma; -} -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu index 7c477c539dc3526389dc22563b50501e778a63f3..240a6ffbace64147aa67224fe72c946761fdc452 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu @@ -2,8 +2,7 @@ #include <cuda_runtime.h> - -#include <lbm/constants/NumericConstants.h> +#include "lbm/constants/NumericConstants.h" #include "lbm/constants/D3Q27.h" using namespace vf::lbm::dir; @@ -80,10 +79,4 @@ __device__ void DistributionWrapper::write() (distribution_references.f[DIR_000])[k] = distribution.f[vf::lbm::dir::ZZZ]; } -__device__ bool isValidFluidNode(uint nodeType) -{ - return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); -} - - } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh index 1009ecfa92f31e821d825ad72ba681bc3ae96d1b..599f3f46668c07da49725770177d77239f8ef9df 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh @@ -37,76 +37,13 @@ #include "lbm/KernelParameter.h" #include "lbm/constants/D3Q27.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" using namespace vf::lbm::dir; namespace vf::gpu { -__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep) -{ - if (isEvenTimestep) - { - dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; - dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes]; - dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes]; - dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; - dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; - dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes]; - dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes]; - dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; - dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; - dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; - dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; - dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes]; - dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes]; - dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes]; - dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes]; - dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes]; - dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes]; - dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes]; - dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes]; - dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes]; - dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes]; - dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes]; - dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes]; - dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes]; - dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes]; - dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes]; - dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes]; - } - else - { - dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes]; - dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes]; - dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; - dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; - dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes]; - dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes]; - dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; - dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; - dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; - dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; - dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes]; - dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes]; - dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes]; - dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes]; - dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes]; - dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes]; - dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes]; - dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes]; - dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; - dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes]; - dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes]; - dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes]; - dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes]; - dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes]; - dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes]; - dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes]; - dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes]; - } -} - /** * Getting references to the 27 directions. * @params distributions 1D real* array containing all data (number of elements = 27 * matrix_size) @@ -114,7 +51,7 @@ __inline__ __device__ __host__ void getPointersToDistributions(Distributions27 & * @params isEvenTimestep: stored data dependent on timestep is based on the esoteric twist algorithm * @return a data struct containing the addresses to the 27 directions within the 1D distribution array */ -__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int numberOfLBnodes, bool isEvenTimestep){ +__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, const unsigned long long numberOfLBnodes, const bool isEvenTimestep){ DistributionReferences27 distribution_references; getPointersToDistributions(distribution_references, distributions, numberOfLBnodes, isEvenTimestep); return distribution_references; @@ -157,20 +94,6 @@ struct DistributionWrapper const uint kbsw; }; -__inline__ __device__ unsigned int getNodeIndex() -{ - const unsigned x = threadIdx.x; - const unsigned y = blockIdx.x; - const unsigned z = blockIdx.y; - - const unsigned nx = blockDim.x; - const unsigned ny = gridDim.x; - - return nx * (ny * z + y) + x; -} - -__device__ bool isValidFluidNode(uint nodeType); - } #endif diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp index 53ec240f096080097416e640fdd095c3812fb34c..5a2d8c9a426e5cb23ca75f91aaf6fbff75cba72b 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp +++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp @@ -8,11 +8,9 @@ #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.h" -#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.h" -#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h" -#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h" +#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.h" @@ -49,9 +47,6 @@ #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.h" #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.h" -//turbulent viscosity kernel -#include "Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h" - //strategies #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h" #include "Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/FluidFlowIncompStrategy.h" @@ -61,7 +56,6 @@ #include "Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADMod7IncompStrategy.h" #include "Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/PMFluidFlowCompStrategy.h" #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/WaleFluidFlowCompStrategy.h" -#include "Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/TurbulentViscosityFluidFlowCompStrategy.h" std::vector<std::shared_ptr<Kernel>> KernelFactoryImp::makeKernels(std::shared_ptr<Parameter> para) { @@ -118,9 +112,6 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> } else if (kernel == "CumulantCompSP27") { newKernel = CumulantCompSP27::getNewInstance(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); - } else if (kernel == "CumulantK17Comp") { - newKernel = CumulantK17Comp::getNewInstance(para, level); - checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "CumulantK15Unified") { newKernel = std::make_shared<vf::gpu::CumulantK15Unified>(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); @@ -133,12 +124,26 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> } else if (kernel == "CumulantK17CompChim") { newKernel = CumulantK17CompChim::getNewInstance(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); - } else if (kernel == "CumulantK17CompChimStream") { - newKernel = CumulantK17CompChimStream::getNewInstance(para, level); - checkStrategy = FluidFlowCompStrategy::getInstance(); - } else if (kernel == "CumulantK17CompChimRedesigned") { - newKernel = CumulantK17CompChimRedesigned::getNewInstance(para, level); - checkStrategy = FluidFlowCompStrategy::getInstance(); + } else if (kernel == "CumulantK17"){ + switch(para->getTurbulenceModel()) + { + case TurbulenceModel::AMD: + newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level); + break; + case TurbulenceModel::Smagorinsky: + newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level); + break; + case TurbulenceModel::QR: + newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level); + break; + case TurbulenceModel::None: + newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level); + break; + default: + throw std::runtime_error("Unknown turbulence model!"); + break; + } + checkStrategy = FluidFlowCompStrategy::getInstance(); } else if (kernel == "CumulantAll4CompSP27") { newKernel = CumulantAll4CompSP27::getNewInstance(para, level); checkStrategy = FluidFlowCompStrategy::getInstance(); @@ -197,35 +202,9 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> newKernel = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level);// || checkStrategy = WaleFluidFlowCompStrategy::getInstance(); // wale model } //=============== - else if (kernel == "TurbulentViscosityCumulantK17CompChim"){ // compressible with turbulent viscosity - switch(para->getTurbulenceModel()) // || - { // \/ // - case TurbulenceModel::AMD: - newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>::getNewInstance(para, level); - break; - case TurbulenceModel::Smagorinsky: - newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>::getNewInstance(para, level); - break; - case TurbulenceModel::QR: - newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>::getNewInstance(para, level); - break; - case TurbulenceModel::None: - throw std::runtime_error("TurbulentViscosityCumulantK17CompChim currently not implemented for TurbulenceModel::None!"); - break; - default: - throw std::runtime_error("Unknown turbulence model!"); - break; - } - checkStrategy = TurbulentViscosityFluidFlowCompStrategy::getInstance(); - // /\ // - // || - // compressible with turbulent viscosity - //=============== - } else { throw std::runtime_error("KernelFactory does not know the KernelType."); } - newKernel->setCheckParameterStrategy(checkStrategy); para->setKernelNeedsFluidNodeIndicesToRun(newKernel->getKernelUsesFluidNodeIndices()); return newKernel; diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h deleted file mode 100644 index 13ce5d88aaa7cb49225fa914c1f59c2de05802f5..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h +++ /dev/null @@ -1,148 +0,0 @@ -//======================================================================================= -// ____ ____ __ ______ __________ __ __ __ __ -// \ \ | | | | | _ \ |___ ___| | | | | / \ | | -// \ \ | | | | | |_) | | | | | | | / \ | | -// \ \ | | | | | _ / | | | | | | / /\ \ | | -// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ -// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| -// \ \ | | ________________________________________________________________ -// \ \ | | | ______________________________________________________________| -// \ \| | | | __ __ __ __ ______ _______ -// \ | | |_____ | | | | | | | | | _ \ / _____) -// \ | | _____| | | | | | | | | | | \ \ \_______ -// \ | | | | |_____ | \_/ | | | | |_/ / _____ | -// \ _____| |__| |________| \_______/ |__| |______/ (_______/ -// -// This file is part of VirtualFluids. VirtualFluids is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \file scalingHelperFunctions.h -//! \ingroup GPU/Kernel/Utilities -//! \author Martin Schoenherr, Anna Wellmann -//======================================================================================= - -#ifndef SCALING_HELPER_FUNCTIONS_H -#define SCALING_HELPER_FUNCTIONS_H - -#include "LBM/LB.h" -#include "lbm/constants/D3Q27.h" -#include "lbm/constants/NumericConstants.h" - -using namespace vf::lbm::constant; -using namespace vf::lbm::dir; - -__device__ __inline__ void calculateMomentsOnSourceNodes( - Distributions27& dist, - real& omega, - unsigned int& k_000, - unsigned int& k_M00, - unsigned int& k_0M0, - unsigned int& k_00M, - unsigned int& k_MM0, - unsigned int& k_M0M, - unsigned int& k_0MM, - unsigned int& k_MMM, - real& drho, - real& velocityX, - real& velocityY, - real& velocityZ, - real& kxyFromfcNEQ, - real& kyzFromfcNEQ, - real& kxzFromfcNEQ, - real& kxxMyyFromfcNEQ, - real& kxxMzzFromfcNEQ - ){ - //////////////////////////////////////////////////////////////////////////////////// - //! - Set local distributions (f's) on source nodes: - //! - real f_000 = (dist.f[DIR_000])[k_000]; - real f_P00 = (dist.f[DIR_P00])[k_000]; - real f_M00 = (dist.f[DIR_M00])[k_M00]; - real f_0P0 = (dist.f[DIR_0P0])[k_000]; - real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; - real f_00P = (dist.f[DIR_00P])[k_000]; - real f_00M = (dist.f[DIR_00M])[k_00M]; - real f_PP0 = (dist.f[DIR_PP0])[k_000]; - real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; - real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; - real f_MP0 = (dist.f[DIR_MP0])[k_M00]; - real f_P0P = (dist.f[DIR_P0P])[k_000]; - real f_M0M = (dist.f[DIR_M0M])[k_M0M]; - real f_P0M = (dist.f[DIR_P0M])[k_00M]; - real f_M0P = (dist.f[DIR_M0P])[k_M00]; - real f_0PP = (dist.f[DIR_0PP])[k_000]; - real f_0MM = (dist.f[DIR_0MM])[k_0MM]; - real f_0PM = (dist.f[DIR_0PM])[k_00M]; - real f_0MP = (dist.f[DIR_0MP])[k_0M0]; - real f_PPP = (dist.f[DIR_PPP])[k_000]; - real f_MPP = (dist.f[DIR_MPP])[k_M00]; - real f_PMP = (dist.f[DIR_PMP])[k_0M0]; - real f_MMP = (dist.f[DIR_MMP])[k_MM0]; - real f_PPM = (dist.f[DIR_PPM])[k_00M]; - real f_MPM = (dist.f[DIR_MPM])[k_M0M]; - real f_PMM = (dist.f[DIR_PMM])[k_0MM]; - real f_MMM = (dist.f[DIR_MMM])[k_MMM]; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), - //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + - (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + - ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + - ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + - f_000; - - real oneOverRho = c1o1 / (c1o1 + drho); - - velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + - (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * - oneOverRho; - velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + - (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * - oneOverRho; - velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + - (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * - oneOverRho; - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate second order moments for interpolation - //! - // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction - kxyFromfcNEQ = - -c3o1 * omega * - ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) / - (c1o1 + drho) - - ((velocityX * velocityY))); - kyzFromfcNEQ = - -c3o1 * omega * - ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) / - (c1o1 + drho) - - ((velocityY * velocityZ))); - kxzFromfcNEQ = - -c3o1 * omega * - ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) / - (c1o1 + drho) - - ((velocityX * velocityZ))); - kxxMyyFromfcNEQ = - -c3o2 * omega * - ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / (c1o1 + drho) - - ((velocityX * velocityX - velocityY * velocityY))); - kxxMzzFromfcNEQ = - -c3o2 * omega * - ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / (c1o1 + drho) - - ((velocityX * velocityX - velocityZ * velocityZ))); -} - -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp index 9ca813ac4987af618491422acb60207b7fee543c..5a36daecd5a82fc8a052bf51fedc1cb35b94a960 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp @@ -90,10 +90,10 @@ void ADKernelManager::initAD(const int level) const //////////////////////////////////////////////////////////////////////////////// void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryManager> cudaMemoryManager) const { - for (uint j = 1; j <= para->getParH(level)->numberOfNodes; j++) { - const real coordX = para->getParH(level)->coordinateX[j]; - const real coordY = para->getParH(level)->coordinateY[j]; - const real coordZ = para->getParH(level)->coordinateZ[j]; + for (size_t index = 1; index <= para->getParH(level)->numberOfNodes; index++) { + const real coordX = para->getParH(level)->coordinateX[index]; + const real coordY = para->getParH(level)->coordinateY[index]; + const real coordZ = para->getParH(level)->coordinateZ[index]; real concentration; @@ -104,7 +104,7 @@ void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryMan concentration = real(0.0); } - para->getParH(level)->concentration[j] = concentration; + para->getParH(level)->concentration[index] = concentration; } cudaMemoryManager->cudaCopyConcentrationHostToDevice(level); diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp index cc945ea225a28c58dca4ceefdb80fffb76228b21..e8fc3f318c920be36be7861a28659124a7b1e977 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp @@ -38,6 +38,7 @@ #include "BCKernelManager.h" #include "Factories/BoundaryConditionFactory.h" +#include "GridGenerator/TransientBCSetter/TransientBCSetter.h" #include "Calculation/Cp.h" #include "Calculation/DragLift.h" #include "GPU/GPU_Interface.h" @@ -51,6 +52,7 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac this->pressureBoundaryConditionPre = bcFactory->getPressureBoundaryConditionPre(); this->geometryBoundaryConditionPost = bcFactory->getGeometryBoundaryConditionPost(); this->stressBoundaryConditionPost = bcFactory->getStressBoundaryConditionPost(); + this->precursorBoundaryConditionPost = bcFactory->getPrecursorBoundaryConditionPost(); checkBoundaryCondition(this->velocityBoundaryConditionPost, this->para->getParD(0)->velocityBC, "velocityBoundaryConditionPost"); @@ -64,6 +66,8 @@ BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFac "geometryBoundaryConditionPost"); checkBoundaryCondition(this->stressBoundaryConditionPost, this->para->getParD(0)->stressBC, "stressBoundaryConditionPost"); + checkBoundaryCondition(this->precursorBoundaryConditionPost, this->para->getParD(0)->precursorBC, + "precursorBoundaryConditionPost"); } void BCKernelManager::runVelocityBCKernelPre(const int level) const @@ -387,3 +391,41 @@ void BCKernelManager::runNoSlipBCKernelPost(const int level) const{ noSlipBoundaryConditionPost(para->getParD(level).get(), &(para->getParD(level)->noSlipBC)); } } + +void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager) +{ + if(para->getParH(level)->precursorBC.numberOfBCnodes == 0) return; + + uint t_level = para->getTimeStep(level, t, true); + + uint lastTime = (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into last arrays + uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into current arrays + uint nextTime = para->getParD(level)->precursorBC.nPrecursorReads *para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into next arrays + + if(t_level>=currentTime) + { + //cycle time + lastTime = currentTime; + currentTime = nextTime; + nextTime += para->getParD(level)->precursorBC.timeStepsBetweenReads; + + //cycle pointers + real* tmp = para->getParD(level)->precursorBC.last; + para->getParD(level)->precursorBC.last = para->getParD(level)->precursorBC.current; + para->getParD(level)->precursorBC.current = para->getParD(level)->precursorBC.next; + para->getParD(level)->precursorBC.next = tmp; + + real loadTime = nextTime*pow(2,-level)*para->getTimeRatio(); + + for(auto reader : para->getParH(level)->transientBCInputFileReader) + { + reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, loadTime); + } + cudaMemoryManager->cudaCopyPrecursorData(level); + para->getParD(level)->precursorBC.nPrecursorReads++; + para->getParH(level)->precursorBC.nPrecursorReads++; + } + + real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.timeStepsBetweenReads; + precursorBoundaryConditionPost(para->getParD(level).get(), ¶->getParD(level)->precursorBC, tRatio, para->getVelocityRatio()); +} diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h index 423a9cc9056281a3a2a135ae32fa26cc47f93967..339100e6b5307e8e60f8d0846560bf89c6eea1a1 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h @@ -41,6 +41,7 @@ #include "PointerDefinitions.h" #include "VirtualFluids_GPU_export.h" + class CudaMemoryManager; class BoundaryConditionFactory; class Parameter; @@ -48,6 +49,7 @@ struct LBMSimulationParameter; using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>; using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>; +using precursorBoundaryCondition = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>; //! \class BCKernelManager //! \brief manage the cuda kernel calls to boundary conditions @@ -84,7 +86,10 @@ public: //! \brief calls the device function of the pressure boundary condition (post-collision) void runPressureBCKernelPost(const int level) const; - //! \brief calls the device function of the outflow boundary condition (pre-collision) + //! \brief calls the device function of the precursor boundary condition + void runPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + + //! \brief calls the device function of the outflow boundary condition void runOutflowBCKernelPre(const int level) const; //! \brief calls the device function of the stress wall model (post-collision) @@ -96,13 +101,16 @@ private: //! \param boundaryCondition: a kernel function for the boundary condition //! \param bcStruct: a struct containing the grid nodes which are part of the boundary condition //! \param bcName: the name of the checked boundary condition - template <typename bcFunction> - void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBoundaryConditions &bcStruct, const std::string &bcName) + template <typename bcFunction, typename QforBC> + void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBC &bcStruct, const std::string &bcName) { if (!boundaryCondition && bcStruct.numberOfBCnodes > 0) throw std::runtime_error("The boundary condition " + bcName + " was not set!"); } + void runDistributionPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + void runVelocityPrecursorBCKernelPost(int level, uint t, CudaMemoryManager* cudaMemoryManager); + SPtr<Parameter> para; boundaryCondition velocityBoundaryConditionPost = nullptr; @@ -111,5 +119,6 @@ private: boundaryCondition pressureBoundaryConditionPre = nullptr; boundaryCondition geometryBoundaryConditionPost = nullptr; boundaryConditionWithParameter stressBoundaryConditionPost = nullptr; + precursorBoundaryCondition precursorBoundaryConditionPost = nullptr; }; #endif diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp index d55fa51bd8a225dd4e89e684bc81cd56f3f450c0..a0e02112e821eedcfeb013d3465529f668309529 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp @@ -53,3 +53,9 @@ TEST_F(BCKernelManagerTest_BCsNotSpecified, stressBoundaryConditionPost_NotSpeci para->getParD(0)->stressBC.numberOfBCnodes = 1; EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error); } + +TEST_F(BCKernelManagerTest_BCsNotSpecified, precursorBoundaryConditionPost_NotSpecified) +{ + para->getParD(0)->precursorBC.numberOfBCnodes = 1; + EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error); +} diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp index c3129e31a9c750a012a26d58961062eaf3f40add..2b6a266c0d4e5f523091fa4982eee5d83b2ec675 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp +++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp @@ -59,8 +59,9 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr VF_LOG_TRACE("Function for scalingCoarseToFine is nullptr"); } -void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, int streamIndex) const{ - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); +void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const +{ + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); this->scalingFineToCoarse(para->getParD(level).get(), para->getParD(level+1).get(), icellFC, offFC, stream); @@ -327,9 +328,9 @@ void GridScalingKernelManager::runFineToCoarseKernelAD(const int level) const } } -void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCellCF* icellCF, OffCF &offCF, int streamIndex) const +void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + cudaStream_t stream = para->getStreamManager()->getStream(streamIndex); this->scalingCoarseToFine(para->getParD(level).get(), para->getParD(level+1).get(), icellCF, offCF, stream); // ScaleCF_comp_D3Q27F3( diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h index 85cdd88ec2e3a6622108026ce8f53c5c770f8afe..3c78ee7f9db254556e8ec6dbbafaf51cd995f10b 100644 --- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h +++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h @@ -44,6 +44,7 @@ class Parameter; class CudaMemoryManager; class GridScalingFactory; +enum class CudaStreamIndex; struct LBMSimulationParameter; struct CUstream_st; @@ -62,14 +63,14 @@ public: //! \throws std::runtime_error when the user forgets to specify a scaling function GridScalingKernelManager(SPtr<Parameter> parameter, GridScalingFactory *gridScalingFactory); - //! \brief calls the device function of the fine to coarse grid interpolation kernel - void runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, int streamIndex) const; + //! \brief calls the device function of the fine to coarse grid interpolation kernelH + void runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const; //! \brief calls the device function of the fine to coarse grid interpolation kernel (advection diffusion) void runFineToCoarseKernelAD(const int level) const; //! \brief calls the device function of the coarse to fine grid interpolation kernel - void runCoarseToFineKernelLB(const int level, InterpolationCellCF *icellCF, OffCF &offCF, int streamIndex) const; + void runCoarseToFineKernelLB(const int level, InterpolationCellCF *icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const; //! \brief calls the device function of the coarse to fine grid interpolation kernel (advection diffusion) void runCoarseToFineKernelAD(const int level) const; diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h new file mode 100644 index 0000000000000000000000000000000000000000..225f615ec3ad2d8ef11ec295f8d9e8a4166d99fe --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h @@ -0,0 +1,108 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ChimeraTransformation.h +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters +//======================================================================================= +#ifndef CHIMERA_TRANSFORMATION_H +#define CHIMERA_TRANSFORMATION_H + +#include "LBM/LB.h" + +#include <lbm/constants/NumericConstants.h> + +using namespace vf::lbm::constant; + +namespace vf::gpu +{ + +//////////////////////////////////////////////////////////////////////////////// +//! \brief forward chimera transformation \ref forwardInverseChimeraWithK +//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> Modified for lower round-off errors. +__inline__ __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) +{ + real m2 = mfa + mfc; + real m1 = mfc - mfa; + real m0 = m2 + mfb; + mfa = m0; + m0 *= Kinverse; + m0 += c1o1; + mfb = (m1 * Kinverse - m0 * vv) * K; + mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief backward chimera transformation \ref backwardInverseChimeraWithK +//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> Modified for lower round-off errors. +__inline__ __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) +{ + real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K; + real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K; + mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K; + mfa = m0; + mfb = m1; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief forward chimera transformation \ref forwardChimera +//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off +//! errors. +__inline__ __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) +{ + real m1 = (mfa + mfc) + mfb; + real m2 = mfc - mfa; + mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2); + mfb = m2 - vv * m1; + mfa = m1; +} + +//////////////////////////////////////////////////////////////////////////////// +//! \brief backward chimera transformation \ref backwardChimera +//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 +//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off +//! errors. +__inline__ __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) +{ + real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2); + real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv; + mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); + mfb = mb; + mfa = ma; +} + +} // namespace vf::gpu + +#endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h new file mode 100644 index 0000000000000000000000000000000000000000..37208ee59586533fa7f8ffbc269246826ed27fb8 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h @@ -0,0 +1,198 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file KernelUtilities.h +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters +//======================================================================================= +#ifndef KERNEL_UTILITIES_H +#define KERNEL_UTILITIES_H + +#include "LBM/LB.h" +#include "lbm/constants/D3Q27.h" +#include "lbm/constants/NumericConstants.h" + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; + +namespace vf::gpu +{ + +__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep) +{ + if (isEvenTimestep) + { + dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; + dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes]; + dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes]; + dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; + dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; + dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes]; + dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes]; + dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; + dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; + dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; + dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; + dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes]; + dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes]; + dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes]; + dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes]; + dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes]; + dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes]; + dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes]; + dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes]; + dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes]; + dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes]; + dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes]; + dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes]; + dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes]; + dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes]; + dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes]; + dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes]; + } + else + { + dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes]; + dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes]; + dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes]; + dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes]; + dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes]; + dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes]; + dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes]; + dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes]; + dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes]; + dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes]; + dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes]; + dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes]; + dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes]; + dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes]; + dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes]; + dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes]; + dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes]; + dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes]; + dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes]; + dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes]; + dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes]; + dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes]; + dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes]; + dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes]; + dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes]; + dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes]; + dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes]; + } +} + +__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices) +{ + subgridD.q[DIR_P00] = &subgridDistances[DIR_P00 * numberOfSubgridIndices]; + subgridD.q[DIR_M00] = &subgridDistances[DIR_M00 * numberOfSubgridIndices]; + subgridD.q[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfSubgridIndices]; + subgridD.q[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfSubgridIndices]; + subgridD.q[DIR_00P] = &subgridDistances[DIR_00P * numberOfSubgridIndices]; + subgridD.q[DIR_00M] = &subgridDistances[DIR_00M * numberOfSubgridIndices]; + subgridD.q[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfSubgridIndices]; + subgridD.q[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfSubgridIndices]; + subgridD.q[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfSubgridIndices]; + subgridD.q[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfSubgridIndices]; + subgridD.q[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfSubgridIndices]; + subgridD.q[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfSubgridIndices]; + subgridD.q[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfSubgridIndices]; + subgridD.q[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfSubgridIndices]; + subgridD.q[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfSubgridIndices]; + subgridD.q[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfSubgridIndices]; + subgridD.q[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfSubgridIndices]; + subgridD.q[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfSubgridIndices]; + subgridD.q[DIR_000] = &subgridDistances[DIR_000 * numberOfSubgridIndices]; + subgridD.q[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfSubgridIndices]; + subgridD.q[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfSubgridIndices]; + subgridD.q[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfSubgridIndices]; + subgridD.q[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfSubgridIndices]; + subgridD.q[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfSubgridIndices]; + subgridD.q[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfSubgridIndices]; + subgridD.q[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfSubgridIndices]; + subgridD.q[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfSubgridIndices]; +} + +__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight) +{ + return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq); +} + +__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, + const real& omega, const real& velocity, const real weight) +{ + + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q); +} + +__inline__ __device__ real getBounceBackDistributionForVeloBC( const real& f, + const real& velocity, const real weight) +{ + + return f - (c6o1 * weight * velocity); +} + +__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, + const real& omega) +{ + + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + + (q * (f + fInverse)) / (c1o1 + q); +} + + +__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, + const real& omega, const real& drho, const real& velocity, const real weight) +{ + + return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 + + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho; +} + +__inline__ __device__ unsigned int getNodeIndex() +{ + const unsigned x = threadIdx.x; + const unsigned y = blockIdx.x; + const unsigned z = blockIdx.y; + + const unsigned nx = blockDim.x; + const unsigned ny = gridDim.x; + + return nx * (ny * z + y) + x; +} + +__inline__ __device__ bool isValidFluidNode(uint nodeType) +{ + return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2); +} + + +} + +#endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h new file mode 100644 index 0000000000000000000000000000000000000000..53990e452be06dc6840c801816e8231d26861e2e --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h @@ -0,0 +1,136 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ScalingUtilities.h +//! \ingroup LBM/GPUHelperFunctions +//! \author Martin Schoenherr, Anna Wellmann +//======================================================================================= +#ifndef SCALING_HELPER_FUNCTIONS_H +#define SCALING_HELPER_FUNCTIONS_H + +#include "LBM/LB.h" +#include "lbm/constants/D3Q27.h" +#include "lbm/constants/NumericConstants.h" + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; + +namespace vf::gpu +{ + +__device__ __inline__ void calculateMomentsOnSourceNodes(Distributions27 &dist, real &omega, unsigned int &k_000, + unsigned int &k_M00, unsigned int &k_0M0, unsigned int &k_00M, + unsigned int &k_MM0, unsigned int &k_M0M, unsigned int &k_0MM, + unsigned int &k_MMM, real &drho, real &velocityX, + real &velocityY, real &velocityZ, real &kxyFromfcNEQ, + real &kyzFromfcNEQ, real &kxzFromfcNEQ, real &kxxMyyFromfcNEQ, + real &kxxMzzFromfcNEQ) +{ + //////////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions (f's) on source nodes: + //! + real f_000 = (dist.f[DIR_000])[k_000]; + real f_P00 = (dist.f[DIR_P00])[k_000]; + real f_M00 = (dist.f[DIR_M00])[k_M00]; + real f_0P0 = (dist.f[DIR_0P0])[k_000]; + real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; + real f_00P = (dist.f[DIR_00P])[k_000]; + real f_00M = (dist.f[DIR_00M])[k_00M]; + real f_PP0 = (dist.f[DIR_PP0])[k_000]; + real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; + real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; + real f_MP0 = (dist.f[DIR_MP0])[k_M00]; + real f_P0P = (dist.f[DIR_P0P])[k_000]; + real f_M0M = (dist.f[DIR_M0M])[k_M0M]; + real f_P0M = (dist.f[DIR_P0M])[k_00M]; + real f_M0P = (dist.f[DIR_M0P])[k_M00]; + real f_0PP = (dist.f[DIR_0PP])[k_000]; + real f_0MM = (dist.f[DIR_0MM])[k_0MM]; + real f_0PM = (dist.f[DIR_0PM])[k_00M]; + real f_0MP = (dist.f[DIR_0MP])[k_0M0]; + real f_PPP = (dist.f[DIR_PPP])[k_000]; + real f_MPP = (dist.f[DIR_MPP])[k_M00]; + real f_PMP = (dist.f[DIR_PMP])[k_0M0]; + real f_MMP = (dist.f[DIR_MMP])[k_MM0]; + real f_PPM = (dist.f[DIR_PPM])[k_00M]; + real f_MPM = (dist.f[DIR_MPM])[k_M0M]; + real f_PMM = (dist.f[DIR_PMM])[k_0MM]; + real f_MMM = (dist.f[DIR_MMM])[k_MMM]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), + //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! + drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + + (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + + ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + + ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + + f_000; + + real oneOverRho = c1o1 / (c1o1 + drho); + + velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + + (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * + oneOverRho; + velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + + (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * + oneOverRho; + velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + + (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * + oneOverRho; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate second order moments for interpolation + //! + // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction + kxyFromfcNEQ = -c3o1 * omega * + ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) / + (c1o1 + drho) - + ((velocityX * velocityY))); + kyzFromfcNEQ = -c3o1 * omega * + ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) / + (c1o1 + drho) - + ((velocityY * velocityZ))); + kxzFromfcNEQ = -c3o1 * omega * + ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) / + (c1o1 + drho) - + ((velocityX * velocityZ))); + kxxMyyFromfcNEQ = -c3o2 * omega * + ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / + (c1o1 + drho) - + ((velocityX * velocityX - velocityY * velocityY))); + kxxMzzFromfcNEQ = -c3o2 * omega * + ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / + (c1o1 + drho) - + ((velocityX * velocityX - velocityZ * velocityZ))); +} + +} // namespace vf::gpu + +#endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index eea4adfda3c1ef0862f39ef58fc6e065af7bab1b..cfdbbbae040a13f94e97d40d702b93d5a1e19c86 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -15,9 +15,9 @@ ////////////////////////// //porous media -#define GEO_PM_0 5 -#define GEO_PM_1 6 -#define GEO_PM_2 7 +#define GEO_PM_0 5 +#define GEO_PM_1 6 +#define GEO_PM_2 7 ////////////////////////// #define GEO_SOLID 15 @@ -53,17 +53,33 @@ //! \brief An enumeration for selecting a turbulence model enum class TurbulenceModel { //! - Smagorinsky - Smagorinsky, + Smagorinsky, //! - AMD (Anisotropic Minimum Dissipation) model, see e.g. Rozema et al., Phys. Fluids 27, 085107 (2015), https://doi.org/10.1063/1.4928700 - AMD, + AMD, //! - QR model by Verstappen - QR, + QR, //! - TODO: move the WALE model here from the old kernels //WALE //! - No turbulence model - None + None }; +//! \brief An enumeration for selecting a template of the collision kernel (CumulantK17) +enum class CollisionTemplate { + //! - Default: plain collision without additional read/write + Default, + //! - WriteMacroVars: collision \w write out macroscopic variables + WriteMacroVars, + //! - ApplyBodyForce: collision \w read and apply body force in the collision kernel + ApplyBodyForce, + //! - AllFeatures: collision \w write out macroscopic variables AND read and apply body force + AllFeatures, + //! - Border: collision on border nodes + SubDomainBorder +}; +constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::SubDomainBorder}; +constexpr std::initializer_list<CollisionTemplate> bulk_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures}; + struct InitCondition { real Re; @@ -144,6 +160,7 @@ struct InitCondition bool hasWallModelMonitor {false}; bool simulatePorousMedia {false}; bool streetVelocityFile {false}; + real outflowPressureCorrectionFactor {0.0}; }; //Interface Cells @@ -174,7 +191,7 @@ typedef struct OffFC{ // Distribution functions g 6 typedef struct Distri6 { - real* g[6]; + real* g[6]; } Distributions6; // Distribution functions f 7 @@ -214,6 +231,21 @@ typedef struct QforBC{ real *normalX, *normalY, *normalZ; }QforBoundaryConditions; +typedef struct QforPrecursorBC{ + int* k; + int numberOfBCnodes=0; + int sizeQ; + int numberOfPrecursorNodes=0; + uint nPrecursorReads=0; + uint timeStepsBetweenReads; + size_t numberOfQuantities; + real* q27[27]; + uint* planeNeighbor0PP, *planeNeighbor0PM, *planeNeighbor0MP, *planeNeighbor0MM; + real* weights0PP, *weights0PM, *weights0MP, *weights0MM; + real* last, *current, *next; + real velocityX, velocityY, velocityZ; +}QforPrecursorBoundaryConditions; + //BCTemp typedef struct TempforBC{ int* k; @@ -249,57 +281,56 @@ typedef struct WMparas{ real* Fz; }WallModelParameters; + //measurePoints typedef struct MeasP{ - std::string name; - uint k; - std::vector<real> Vx; - std::vector<real> Vy; - std::vector<real> Vz; - std::vector<real> Rho; - //real* Vx; - //real* Vy; - //real* Vz; - //real* Rho; + std::string name; + uint k; + std::vector<real> Vx; + std::vector<real> Vy; + std::vector<real> Vz; + std::vector<real> Rho; + //real* Vx; + //real* Vy; + //real* Vz; + //real* Rho; }MeasurePoints; //Process Neighbors typedef struct PN27{ - real* f[27]; - uint memsizeFs; - int* index; - uint memsizeIndex; - uint rankNeighbor; - int numberOfNodes; - int numberOfFs; + real* f[27]; + uint memsizeFs; + int* index; + uint memsizeIndex; + uint rankNeighbor; + int numberOfNodes; + int numberOfFs; }ProcessNeighbor27; typedef struct PN_F3 { - real* g[6]; - uint memsizeGs; - int* index; - uint memsizeIndex; - uint rankNeighbor; - int numberOfNodes; - int numberOfGs; + real* g[6]; + uint memsizeGs; + int* index; + uint memsizeIndex; + uint rankNeighbor; + int numberOfNodes; + int numberOfGs; }ProcessNeighborF3; //path line particles typedef struct PLP{ - bool *stuck, *hot; - real *coordXabsolut, *coordYabsolut, *coordZabsolut; - real *coordXlocal, *coordYlocal, *coordZlocal; - real *veloX, *veloY, *veloZ; - real *randomLocationInit; - uint *timestep; - uint *ID; - uint *cellBaseID; - uint numberOfParticles, numberOfTimestepsParticles; - uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC; + bool *stuck, *hot; + real *coordXabsolut, *coordYabsolut, *coordZabsolut; + real *coordXlocal, *coordYlocal, *coordZlocal; + real *veloX, *veloY, *veloZ; + real *randomLocationInit; + uint *timestep; + uint *ID; + uint *cellBaseID; + uint numberOfParticles, numberOfTimestepsParticles; + uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC; }PathLineParticles; - - ////////////////////////////////////////////////////////////////////////// inline int vectorPosition(int i, int j, int k, int Lx, int Ly ) { @@ -308,7 +339,4 @@ inline int vectorPosition(int i, int j, int k, int Lx, int Ly ) } ////////////////////////////////////////////////////////////////////////// - #endif - - diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 703e935e6edb5676c7d6e045a38e3ec20d7a4b41..84ab84ff93fa7706bcc27d7e61a18f580f3c8dbe 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -97,11 +97,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa gridProvider.allocAndCopyForcing(); gridProvider.allocAndCopyQuadricLimiters(); - if (para->getKernelNeedsFluidNodeIndicesToRun()) { - gridProvider.allocArrays_fluidNodeIndices(); - gridProvider.allocArrays_fluidNodeIndicesBorder(); - } - + gridProvider.setDimensions(); gridProvider.setBoundingBox(); @@ -113,12 +109,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa para->setStartTurn((unsigned int)0); // 100000 restart_object = std::make_shared<ASCIIRestartObject>(); - ////////////////////////////////////////////////////////////////////////// - // CUDA streams - if (para->getUseStreams()) { - para->getStreamManager()->launchStreams(2u); - para->getStreamManager()->createCudaEvents(); - } + ////////////////////////////////////////////////////////////////////////// VF_LOG_INFO("LB_Modell: D3Q{}", para->getD3Qxx()); VF_LOG_INFO("Re: {}", para->getRe()); @@ -134,14 +125,32 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa ////////////////////////////////////////////////////////////////////////// allocNeighborsOffsetsScalesAndBoundaries(gridProvider); + //! Get tagged fluid nodes with corresponding value for CollisionTemplate from interactors for (SPtr<PreCollisionInteractor> actuator : para->getActuators()) { actuator->init(para.get(), &gridProvider, cudaMemoryManager.get()); + actuator->getTaggedFluidNodes( para.get(), &gridProvider ); } for (SPtr<PreCollisionInteractor> probe : para->getProbes()) { probe->init(para.get(), &gridProvider, cudaMemoryManager.get()); + probe->getTaggedFluidNodes( para.get(), &gridProvider ); } + ////////////////////////////////////////////////////////////////////////// + // CUDA streams + if (para->getUseStreams()) { + para->getStreamManager()->registerStream(CudaStreamIndex::SubDomainBorder); + para->getStreamManager()->registerStream(CudaStreamIndex::Bulk); + para->getStreamManager()->launchStreams(); + para->getStreamManager()->createCudaEvents(); + } + ////////////////////////////////////////////////////////////////////////// + + if (para->getKernelNeedsFluidNodeIndicesToRun()) + { + gridProvider.sortFluidNodeTags(); + gridProvider.allocArrays_taggedFluidNodes(); + } ////////////////////////////////////////////////////////////////////////// // Kernel init ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu new file mode 100644 index 0000000000000000000000000000000000000000..f7bb2e680c0fb3ea597239ee0cbc1772f2efe81b --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu @@ -0,0 +1,179 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DistributionDebugInspector.cu +//! \ingroup Output +//! \author Henrik Asmuth, Henry Korb +//====================================================================================== +#include "DistributionDebugInspector.h" + +#include "Parameter/Parameter.h" +#include "lbm/constants/D3Q27.h" +#include "lbm/constants/NumericConstants.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" + +#include <cuda/CudaGrid.h> +#include <cuda.h> + +#include <iostream> + +using namespace vf::lbm::constant; +using namespace vf::lbm::dir; +using namespace vf::gpu; + +__global__ void printFs( + real* distributions, + bool isEvenTimestep, + unsigned long long numberOfFluidNodes, + uint* neighborX, + uint* neighborY, + uint* neighborZ, + uint* typeOfGridNode, + real* coordX, + real* coordY, + real* coordZ, + real minX, + real maxX, + real minY, + real maxY, + real minZ, + real maxZ) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned k_000 = getNodeIndex(); + + if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) + return; + + real coordNodeX = coordX[k_000]; + real coordNodeY = coordY[k_000]; + real coordNodeZ = coordZ[k_000]; + + if( coordNodeX>=minX && coordNodeX<=maxX && + coordNodeY>=minY && coordNodeY<=maxY && + coordNodeZ>=minZ && coordNodeZ<=maxZ ) + { + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfFluidNodes, isEvenTimestep); + //////////////////////////////////////////////////////////////////////////////// + //! - Set neighbor indices (necessary for indirect addressing) + uint k_M00 = neighborX[k_000]; + uint k_0M0 = neighborY[k_000]; + uint k_00M = neighborZ[k_000]; + uint k_MM0 = neighborY[k_M00]; + uint k_M0M = neighborZ[k_M00]; + uint k_0MM = neighborZ[k_0M0]; + uint k_MMM = neighborZ[k_MM0]; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set local distributions + //! + real f_000 = (dist.f[DIR_000])[k_000]; + real f_P00 = (dist.f[DIR_P00])[k_000]; + real f_M00 = (dist.f[DIR_M00])[k_M00]; + real f_0P0 = (dist.f[DIR_0P0])[k_000]; + real f_0M0 = (dist.f[DIR_0M0])[k_0M0]; + real f_00P = (dist.f[DIR_00P])[k_000]; + real f_00M = (dist.f[DIR_00M])[k_00M]; + real f_PP0 = (dist.f[DIR_PP0])[k_000]; + real f_MM0 = (dist.f[DIR_MM0])[k_MM0]; + real f_PM0 = (dist.f[DIR_PM0])[k_0M0]; + real f_MP0 = (dist.f[DIR_MP0])[k_M00]; + real f_P0P = (dist.f[DIR_P0P])[k_000]; + real f_M0M = (dist.f[DIR_M0M])[k_M0M]; + real f_P0M = (dist.f[DIR_P0M])[k_00M]; + real f_M0P = (dist.f[DIR_M0P])[k_M00]; + real f_0PP = (dist.f[DIR_0PP])[k_000]; + real f_0MM = (dist.f[DIR_0MM])[k_0MM]; + real f_0PM = (dist.f[DIR_0PM])[k_00M]; + real f_0MP = (dist.f[DIR_0MP])[k_0M0]; + real f_PPP = (dist.f[DIR_PPP])[k_000]; + real f_MPP = (dist.f[DIR_MPP])[k_M00]; + real f_PMP = (dist.f[DIR_PMP])[k_0M0]; + real f_MMP = (dist.f[DIR_MMP])[k_MM0]; + real f_PPM = (dist.f[DIR_PPM])[k_00M]; + real f_MPM = (dist.f[DIR_MPM])[k_M0M]; + real f_PMM = (dist.f[DIR_PMM])[k_0MM]; + real f_MMM = (dist.f[DIR_MMM])[k_MMM]; + + real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) + + (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) + + ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) + + ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) + + f_000; + + real oneOverRho = c1o1 / (c1o1 + drho); + + real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) + + (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) * + oneOverRho; + real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) + + (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) * + oneOverRho; + real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) + + (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) * + oneOverRho; + + printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz); + printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM); + + } + +} + + + + +void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t) +{ + if(this->inspectionLevel!=level) + return; + + std::cout << tag << ": distributions on level " << level << " at t " << t << std::endl; + + vf::cuda::CudaGrid cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + printFs <<< cudaGrid.grid, cudaGrid.threads >>>( para->getParD(level)->distributions.f[0], + para->getParD(level)->isEvenTimestep, + para->getParD(level)->numberOfNodes, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->coordinateX, + para->getParD(level)->coordinateY, + para->getParD(level)->coordinateZ, + minX, + maxX, + minY, + maxY, + minZ, + maxZ); + +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h new file mode 100644 index 0000000000000000000000000000000000000000..95fea46d4eba0c2f2ff0846d22ee5da4f6c357ea --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h @@ -0,0 +1,76 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DistributionDebugInspector.h +//! \author Henrik Asmuth +//! \date 13/012/2022 +//! \brief Basic debugging class to print out f's in a certain area of the domain +//! +//! Basic debugging class. Needs to be directly added in UpdateGrid (could potentially also be added as a proper Probe in the future) +//! How to use: Define a part of the domain via min/max x, y, and z. The DistributionDebugInspector will print out all f's in that area. +//! +//======================================================================================= + +#ifndef DISTRIBUTION_INSPECTOR_H +#define DISTRIBUTION_INSPECTOR_H + +#include "Parameter/Parameter.h" + + +class DistributionDebugInspector +{ +public: + DistributionDebugInspector(uint _inspectionLevel, real _minX, real _maxX, real _minY, real _maxY, real _minZ, real _maxZ, std::string _tag): + inspectionLevel(_inspectionLevel), + minX(_minX), + maxX(_maxX), + minY(_minY), + maxY(_maxY), + minZ(_minZ), + maxZ(_maxZ), + tag(_tag) + {}; + + ~DistributionDebugInspector() = default; + + void inspect(std::shared_ptr<Parameter> para, uint level, uint t); + + +private: +uint inspectionLevel; +real minX; +real maxX; +real minY; +real maxY; +real minZ; +real maxZ; +std::string tag; + +}; + +#endif diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp index c6e53ee3cbfb98f11e373ca014c7faf4e70a86f0..edf705421530bdbc9c2c9fd8c44eca6d3c5ab923 100644 --- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp +++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp @@ -50,7 +50,7 @@ void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int tim void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level) { - const unsigned int numberOfParts = para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1; + const unsigned int numberOfParts = (uint)para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1; std::vector<std::string> fname; std::vector<std::string> fnameMed; @@ -217,8 +217,8 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev for (unsigned int part = 0; part < fname.size(); part++) { - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + if (((part + 1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); else sizeOfNodes = para->getlimitOfNodesForVTK(); @@ -340,8 +340,8 @@ void FileWriter::writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int for (unsigned int part = 0; part < fname.size(); part++) { - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); else sizeOfNodes = para->getlimitOfNodesForVTK(); @@ -449,9 +449,9 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i { //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -558,8 +558,8 @@ void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> par for (unsigned int part = 0; part < fname.size(); part++) { - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); else sizeOfNodes = para->getlimitOfNodesForVTK(); ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp index 0b1e9dc1c25457457eabe3013a288c4c93577dc3..4d5895b323efa1b94a5780a59c882fd5ce1be7eb 100644 --- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp @@ -290,10 +290,10 @@ void writeNeighborXPointsDebug(Parameter *para) nodesVec.resize(nodeNumberVec); int nodeCount2 = 0; for (int level = 0; level <= para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]]; - real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]]; - real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]]; + real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]]; + real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]]; nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); } @@ -317,18 +317,18 @@ void writeNeighborXLinesDebug(Parameter *para) nodesVec.resize(nodeNumberVec * 2); int nodeCount = 0; for (int level = 0; level < para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[u]; - real x2 = para->getParH(level)->coordinateY[u]; - real x3 = para->getParH(level)->coordinateZ[u]; - real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]]; - real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]]; - real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[index]; + real x2 = para->getParH(level)->coordinateY[index]; + real x3 = para->getParH(level)->coordinateZ[index]; + real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]]; + real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]]; + real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]]; nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N))); - if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) { + if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) { cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1)); } } @@ -350,10 +350,10 @@ void writeNeighborYPointsDebug(Parameter *para) nodesVec.resize(nodeNumberVec); int nodeCount2 = 0; for (int level = 0; level <= para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]]; - real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]]; - real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]]; + real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]]; + real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]]; nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); } @@ -377,18 +377,18 @@ void writeNeighborYLinesDebug(Parameter *para) nodesVec.resize(nodeNumberVec * 2); int nodeCount = 0; for (int level = 0; level < para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[u]; - real x2 = para->getParH(level)->coordinateY[u]; - real x3 = para->getParH(level)->coordinateZ[u]; - real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]]; - real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]]; - real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[index]; + real x2 = para->getParH(level)->coordinateY[index]; + real x3 = para->getParH(level)->coordinateZ[index]; + real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]]; + real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]]; + real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]]; nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N))); - if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) { + if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) { cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1)); } } @@ -410,10 +410,10 @@ void writeNeighborZPointsDebug(Parameter *para) nodesVec.resize(nodeNumberVec); int nodeCount2 = 0; for (int level = 0; level <= para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]]; - real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]]; - real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]]; + real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]]; + real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]]; nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); } @@ -437,18 +437,18 @@ void writeNeighborZLinesDebug(Parameter *para) nodesVec.resize(nodeNumberVec * 2); int nodeCount = 0; for (int level = 0; level < para->getMaxLevel(); level++) { - for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) { - real x1 = para->getParH(level)->coordinateX[u]; - real x2 = para->getParH(level)->coordinateY[u]; - real x3 = para->getParH(level)->coordinateZ[u]; - real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]]; - real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]]; - real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]]; + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { + real x1 = para->getParH(level)->coordinateX[index]; + real x2 = para->getParH(level)->coordinateY[index]; + real x3 = para->getParH(level)->coordinateZ[index]; + real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]]; + real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]]; + real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]]; nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3))); nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N))); - if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) { + if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) { cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1)); } } diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp index 83f0a677b0012153cf079b466a333acc58bda6be..57139d25ae4d046e1dd1be1f3ef5e179daf0872e 100644 --- a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp @@ -5,53 +5,57 @@ #include "Logger.h" #include "Parameter/Parameter.h" #include "basics/utilities/UbSystem.h" -#include "grid/NodeValues.h" +#include "gpu/GridGenerator/grid/NodeValues.h" #include "lbm/constants/D3Q27.h" #include <basics/writer/WbWriterVtkXmlBinary.h> -#include "Utilities/FindNeighbors.h" -#include "VirtualFluids_GPU/Communication/Communicator.h" #include "Core/StringUtilities/StringUtil.h" +#include "Utilities/FindNeighbors.h" +#include "gpu/VirtualFluids_GPU/Communication/Communicator.h" namespace NeighborDebugWriter { -inline void writeNeighborLinkLines(Parameter *para, const int level, const uint numberOfNodes, const int direction, - const std::string &name) +inline void writeNeighborLinkLines(LBMSimulationParameter *parH, int direction, const std::string &name, + WbWriter *writer) { VF_LOG_INFO("Write node links in direction {}.", direction); - std::vector<UbTupleFloat3> nodes(numberOfNodes * 2); - std::vector<UbTupleInt2> cells(numberOfNodes); - for (uint position = 0; position < numberOfNodes; position++) { - if (para->getParH(level)->typeOfGridNode[position] != GEO_FLUID) + const unsigned long long numberOfNodes = parH->numberOfNodes; + std::vector<UbTupleFloat3> nodes; + nodes.reserve(numberOfNodes); + std::vector<UbTupleInt2> cells; + cells.reserve(numberOfNodes/2); + + for (size_t position = 0; position < numberOfNodes; position++) { + if (parH->typeOfGridNode[position] != GEO_FLUID) continue; - const double x1 = para->getParH(level)->coordinateX[position]; - const double x2 = para->getParH(level)->coordinateY[position]; - const double x3 = para->getParH(level)->coordinateZ[position]; + const double x1 = parH->coordinateX[position]; + const double x2 = parH->coordinateY[position]; + const double x3 = parH->coordinateZ[position]; - const uint positionNeighbor = getNeighborIndex(para->getParH(level).get(), position, direction); + const uint positionNeighbor = getNeighborIndex(parH, (uint)position, direction); - const double x1Neighbor = para->getParH(level)->coordinateX[positionNeighbor]; - const double x2Neighbor = para->getParH(level)->coordinateY[positionNeighbor]; - const double x3Neighbor = para->getParH(level)->coordinateZ[positionNeighbor]; + const double x1Neighbor = parH->coordinateX[positionNeighbor]; + const double x2Neighbor = parH->coordinateY[positionNeighbor]; + const double x3Neighbor = parH->coordinateZ[positionNeighbor]; nodes.emplace_back(float(x1), float(x2), float(x3)); nodes.emplace_back(float(x1Neighbor), float(x2Neighbor), float(x3Neighbor)); cells.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1); } - WbWriterVtkXmlBinary::getInstance()->writeLines(name, nodes, cells); + writer->writeLines(name, nodes, cells); } inline void writeNeighborLinkLinesDebug(Parameter *para) { for (int level = 0; level <= para->getMaxLevel(); level++) { - for (int direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) { + for (size_t direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) { const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_Link_" + std::to_string(direction) + "_Debug.vtk"; - writeNeighborLinkLines(para, level, para->getParH(level)->numberOfNodes, direction, fileName); + writeNeighborLinkLines(para->getParH(level).get(), (int)direction, fileName, WbWriterVtkXmlBinary::getInstance()); } } } diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a19ed3d723f28998f5d27cd15ebf4bab8ba061c4 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp @@ -0,0 +1,79 @@ +#include <gmock/gmock.h> +#include "NeighborDebugWriter.hpp" +#include "gpu/VirtualFluids_GPU/Utilities/testUtilitiesGPU.h" + +class WbWriterSpy : public WbWriter +{ +public: + std::string writeLines(const std::string & /*filename*/, std::vector<UbTupleFloat3> &nodes, + std::vector<UbTupleInt2> &lines) override + { + this->nodes = nodes; + this->lines = lines; + return ""; + } + std::vector<UbTupleFloat3> nodes; + std::vector<UbTupleInt2> lines; + + std::string getFileExtension() override { return ""; } +}; + +class NeighborDebugWriterTest : public testing::Test +{ +protected: + void SetUp() override + { + typeOfGridNode = std::vector<uint>(numberOfNodes, GEO_FLUID); + neighbors = std::vector<uint>(numberOfNodes, 2); + coordinates = std::vector<real>(numberOfNodes, 1.0); + coordinates[2] = 3.0; + + parH->numberOfNodes = numberOfNodes; + parH->coordinateX = coordinates.data(); + parH->coordinateY = coordinates.data(); + parH->coordinateZ = coordinates.data(); + parH->neighborX = neighbors.data(); + parH->typeOfGridNode = typeOfGridNode.data(); + } + + const int level = 0; + const unsigned long long numberOfNodes = 3; + const uint direction = vf::lbm::dir::DIR_P00; // x + std::unique_ptr<LBMSimulationParameter> parH = std::make_unique<LBMSimulationParameter>(); + WbWriterSpy writerSpy; + std::vector<uint> typeOfGridNode; + std::vector<uint> neighbors; + std::vector<real> coordinates; +}; + +TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_onlyFLuidNodes_writesAllNodes) +{ + UbTupleFloat3 oneCoord(1.0, 1.0, 1.0); + UbTupleFloat3 threeCoord(3.0, 3.0, 3.0); + std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord, threeCoord, threeCoord }; + std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3), UbTupleInt2(4, 5) }; + + NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy); + + EXPECT_THAT(writerSpy.nodes.size(), testing::Eq(numberOfNodes * 2)); + EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes)); + EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes)); + EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines)); +} + +TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_fluidAndSolidNodes_writesOnlyFluidNodes) +{ + typeOfGridNode[2] = GEO_SOLID; + + UbTupleFloat3 oneCoord(1.0, 1.0, 1.0); + UbTupleFloat3 threeCoord(3.0, 3.0, 3.0); + std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord}; + std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3)}; + + NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy); + + EXPECT_THAT(writerSpy.nodes.size(), testing::Eq((numberOfNodes-1) * 2)); + EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes-1)); + EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes)); + EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines)); +} diff --git a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp index 456f9c148c75c27fb899f976ba4f99b109fc3d4b..ce611d25d1aa3f9e98840a0f04d9b2045d0a224f 100644 --- a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp @@ -33,9 +33,9 @@ public: { out.writeInteger(para->getParH(level)->numberOfNodes); out.writeLine(); - for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++) + for(size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { - out.writeInteger(para->getParH(level)->typeOfGridNode[u]); + out.writeInteger(para->getParH(level)->typeOfGridNode[index]); } out.writeLine(); } //end levelloop @@ -46,9 +46,9 @@ public: { out.writeInteger(para->getParH(level)->numberOfNodes); out.writeLine(); - for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++) + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { - out.writeInteger(para->getParH(level)->neighborX[u]); + out.writeInteger(para->getParH(level)->neighborX[index]); } out.writeLine(); } //end levelloop @@ -59,9 +59,9 @@ public: { out.writeInteger(para->getParH(level)->numberOfNodes); out.writeLine(); - for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++) + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { - out.writeInteger(para->getParH(level)->neighborY[u]); + out.writeInteger(para->getParH(level)->neighborY[index]); } out.writeLine(); } //end levelloop @@ -72,9 +72,9 @@ public: { out.writeInteger(para->getParH(level)->numberOfNodes); out.writeLine(); - for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++) + for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) { - out.writeInteger(para->getParH(level)->neighborZ[u]); + out.writeInteger(para->getParH(level)->neighborZ[index]); } out.writeLine(); } //end levelloop diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d567c695a0e33b7a88c2c8cf3bcb88093ce5b802 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp @@ -0,0 +1,96 @@ +#ifndef QVTKWRITER_HPP +#define QVTKWRITER_HPP + +#include <array> +#include <vector> + +#include "basics/Core/StringUtilities/StringUtil.h" +#include "basics/utilities/UbSystem.h" +#include "basics/writer/WbWriterVtkXmlBinary.h" +#include "lbm/constants/D3Q27.h" +#include "logger/Logger.h" + +#include "gpu/GridGenerator/grid/NodeValues.h" +#include "gpu/VirtualFluids_GPU/Communication/Communicator.h" +#include "gpu/VirtualFluids_GPU/LBM/LB.h" +#include "gpu/VirtualFluids_GPU/Parameter/Parameter.h" +#include "gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h" + +namespace QDebugVtkWriter +{ + +using namespace vf::lbm::dir; + +namespace +{ +inline void modifyLineLengthsForQs(const std::array<double, 3> &coords, std::array<double, 3> &neighborCoords, real q) +{ + if (q == 1.0 || q <= 0.0) + return; + + const auto dx = neighborCoords[0] - coords[0]; + const auto dy = neighborCoords[1] - coords[1]; + const auto dz = neighborCoords[2] - coords[2]; + + neighborCoords[0] = coords[0] + q * dx; + neighborCoords[1] = coords[1] + q * dy; + neighborCoords[2] = coords[2] + q * dz; +} + +inline void writeQLines(LBMSimulationParameter *parH, QforBoundaryConditions &boundaryQ, const std::string &filepath, + WbWriter *writer) +{ + VF_LOG_INFO("Write qs in for boundary condition to {}.", filepath); + + const auto numberOfNodes = boundaryQ.numberOfBCnodes; + std::vector<UbTupleFloat3> nodes; + nodes.reserve(numberOfNodes * 8 * 2); + std::vector<UbTupleInt2> lines; + lines.reserve(numberOfNodes * 8); + + std::vector<std::string> dataNames = { "nodeIndex", "q" }; + std::vector<std::vector<float>> lineData(2); + + for (size_t i = 0; i < numberOfNodes; i++) { + const auto nodeIndex = boundaryQ.k[i]; + const std::array<double, 3> coords = { parH->coordinateX[nodeIndex], parH->coordinateY[nodeIndex], + parH->coordinateZ[nodeIndex] }; + + for (size_t direction = 1; direction < ENDDIR; direction++) { + + const auto q = boundaryQ.q27[direction][i]; + if (q <= (real)0.0) { + continue; + } + + const auto positionNeighbor = getNeighborIndex(parH, (uint)nodeIndex, (int)direction); + + std::array<double, 3> neighborCoords = { parH->coordinateX[positionNeighbor], + parH->coordinateY[positionNeighbor], + parH->coordinateZ[positionNeighbor] }; + + modifyLineLengthsForQs(coords, neighborCoords, q); + + nodes.emplace_back(float(coords[0]), float(coords[1]), coords[2]); + nodes.emplace_back(float(neighborCoords[0]), float(neighborCoords[1]), float(neighborCoords[2])); + + lines.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1); + lineData[0].push_back(nodeIndex); + lineData[1].push_back(q); + } + } + + writer->writeLinesWithLineData(filepath, nodes, lines, dataNames, lineData); +} +} // namespace + +inline void writeQLinesDebug(Parameter *para, QforBoundaryConditions &boundaryQ, uint level, const std::string& fileName) +{ + const auto filePath = para->getFName() + "_" + fileName + ".vtk"; + auto writer = WbWriterVtkXmlBinary::getInstance(); + writeQLines(para->getParH(level).get(), boundaryQ, filePath, writer); +} + +} // namespace QDebugVtkWriter + +#endif diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9eecb25c663fcfc8fde353b76ccf20cbcb9cf272 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp @@ -0,0 +1,60 @@ +#include "gmock/gmock.h" +#include <cmath> +#include <gmock/gmock.h> +#include <gtest/gtest.h> +#include "QDebugVtkWriter.hpp" +#include <tuple> + +MATCHER(DoubleNear5, "") { + return abs(std::get<0>(arg) - std::get<1>(arg)) < 0.00001; +} + +using namespace QDebugVtkWriter; + +double calcVectorLength(const std::array<double, 3> coords, const std::array<double, 3> neighborCoords) +{ + return std::sqrt(std::pow((neighborCoords[0] - coords[0]), 2) + std::pow((neighborCoords[1] - coords[1]), 2) + + std::pow((neighborCoords[2] - coords[2]), 2)); +} + +TEST(QDebugVtkWriterTest, modifyLineLengthsForQsSameCoords3) +{ + const std::array<double, 3> coords = { 0, 0, 0 }; + std::array<double, 3> neighborCoords = { 1, 1, 1 }; + const real q = 0.3; + const real initialLength = calcVectorLength(coords, neighborCoords); + + modifyLineLengthsForQs(coords, neighborCoords, q); + + std::array<double, 3> expectedNeighborCoords = { 0.3, 0.3, 0.3 }; + EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords)); + EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001)); +} + +TEST(QDebugVtkWriterTest, modifyLineLengthDifferentCoords) +{ + const std::array<double, 3> coords = { 0, 0, 0 }; + std::array<double, 3> neighborCoords = { 1, 2, 3 }; + const real q = 0.3; + const real initialLength = calcVectorLength(coords, neighborCoords); + + modifyLineLengthsForQs(coords, neighborCoords, q); + + std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, 0.9 }; + EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords)); + EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001)); +} + +TEST(QDebugVtkWriterTest, modifyLineLengthNegativeCoord) +{ + const std::array<double, 3> coords = { 0, 0, 0 }; + std::array<double, 3> neighborCoords = { 1, 2, -3 }; + const real q = 0.3; + const real initialLength = calcVectorLength(coords, neighborCoords); + + modifyLineLengthsForQs(coords, neighborCoords, q); + + std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, -0.9 }; + EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords)); + EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001)); +} diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp index d006636572377477aeb3599a8ae843ea2b1e31ff..c1a3658d318eb47e84530bf437afa0bb6ba91743 100644 --- a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp @@ -13,8 +13,6 @@ #include <basics/writer/WbWriterVtkXmlBinary.h> #include "Core/StringUtilities/StringUtil.h" -//using namespace std; - namespace QDebugWriter { void writeQValues(QforBoundaryConditions &Q, int* k, int kq, const std::string &name) diff --git a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp index 81f2c028a6bbc7cd9c077571349f4f0465a08a05..f26b4e5795466a72aa1894de37bdb066b9ab9d04 100644 --- a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp +++ b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp @@ -33,7 +33,7 @@ namespace UnstructuredGridWriter bool neighborsFluid; - unsigned int allnodes = para->getParH(level)->numberOfNodes * 8; + unsigned long long allnodes = para->getParH(level)->numberOfNodes * 8; nodes.resize(allnodes); nodedata[0].resize(allnodes); @@ -45,7 +45,7 @@ namespace UnstructuredGridWriter unsigned int nodeCount = 0; double nodeDeltaLevel = para->getParH(level)->dx; - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID /*!= GEO_VOID*/) { @@ -197,9 +197,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -340,9 +340,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -479,9 +479,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -628,9 +628,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -771,9 +771,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -896,10 +896,10 @@ namespace UnstructuredGridWriter vector< vector< double > > nodedata(nodedatanames.size()); //printf("\n test for if... \n"); - if (para->getParH(level)->numberOfNodes > limitOfNodes) + if ((uint)para->getParH(level)->numberOfNodes > limitOfNodes) { //printf("\n test in if I... \n"); - unsigned int restOfNodes = para->getParH(level)->numberOfNodes - limitOfNodes; + unsigned int restOfNodes = (uint)para->getParH(level)->numberOfNodes - limitOfNodes; ////////////////////////////////////////////////////////////////////////// //PART I nodes.resize(limitOfNodes); @@ -984,7 +984,7 @@ namespace UnstructuredGridWriter nodedata[5].resize(restOfNodes); //printf("\n test in if IV... \n"); - for (unsigned int pos=limitOfNodes;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = limitOfNodes; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1055,7 +1055,7 @@ namespace UnstructuredGridWriter nodedata[5].resize(para->getParH(level)->numberOfNodes); //printf("\n test in else II... \n"); - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1148,7 +1148,7 @@ namespace UnstructuredGridWriter unsigned int number1,number2,number3,number4,number5,number6,number7,number8; bool neighborsFluid; double vxmax = 0; - vector< vector< double > > nodedata(nodedatanames.size()); + vector<vector<double>> nodedata(nodedatanames.size()); nodes.resize(para->getParH(level)->numberOfNodes); nodedata[0].resize(para->getParH(level)->numberOfNodes); @@ -1158,7 +1158,7 @@ namespace UnstructuredGridWriter nodedata[4].resize(para->getParH(level)->numberOfNodes); nodedata[5].resize(para->getParH(level)->numberOfNodes); - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1244,7 +1244,7 @@ namespace UnstructuredGridWriter nodedata[4].resize(para->getParH(level)->numberOfNodes); nodedata[5].resize(para->getParH(level)->numberOfNodes); - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1342,9 +1342,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -1364,7 +1364,7 @@ namespace UnstructuredGridWriter nodedata[5].resize(sizeOfNodes); ////////////////////////////////////////////////////////////////////////// //printf("\n test in if II... \n"); - for (unsigned int pos=startpos;pos<endpos;pos++) + for (size_t pos = startpos; pos < endpos; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1465,9 +1465,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -1595,9 +1595,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -1728,7 +1728,7 @@ namespace UnstructuredGridWriter nodedatanames.push_back("geo"); unsigned int number1,number2,number3,number4,number5,number6,number7,number8; bool neighborsFluid; - vector< vector< double > > nodedata(nodedatanames.size()); + vector< vector<double>> nodedata(nodedatanames.size()); nodes.resize(para->getParH(level)->numberOfNodes); nodedata[0].resize(para->getParH(level)->numberOfNodes); @@ -1738,7 +1738,7 @@ namespace UnstructuredGridWriter nodedata[4].resize(para->getParH(level)->numberOfNodes); nodedata[5].resize(para->getParH(level)->numberOfNodes); - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1825,7 +1825,7 @@ namespace UnstructuredGridWriter nodedata[4].resize(para->getParH(level)->numberOfNodes); nodedata[5].resize(para->getParH(level)->numberOfNodes); - for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++) + for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++) { if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { @@ -1975,9 +1975,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -2080,9 +2080,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -2192,9 +2192,9 @@ namespace UnstructuredGridWriter vxmax = 0; //printf("\n test in if I... \n"); ////////////////////////////////////////////////////////////////////////// - if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes) + if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes) { - sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); + sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK()); } else { @@ -2319,7 +2319,7 @@ namespace UnstructuredGridWriter wallX3 = 0.0; q = 0.0; ////////////////////////////////////////////////////////////////////////// - for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++) + for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++) { QQ = para->getParH(level)->geometryBC.q27[0]; Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes]; @@ -2423,7 +2423,7 @@ namespace UnstructuredGridWriter wallX3 = 0.0; q = 0.0; ////////////////////////////////////////////////////////////////////////// - for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++) + for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++) { QQ = para->getParH(level)->velocityBC.q27[0]; Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes]; @@ -2528,7 +2528,7 @@ namespace UnstructuredGridWriter wallX3 = 0.0; q = 0.0; ////////////////////////////////////////////////////////////////////////// - for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++) + for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++) { QQ = para->getParH(level)->pressureBC.q27[0]; Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes]; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp index 3731836f336d91c1bc4cc5f1a8f5ea0a10bee0a6..3cc771e413134e90b0d09d8eeb6dfee791f8a1e2 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -31,25 +31,39 @@ #include <helper_cuda.h> #include <iostream> -void CudaStreamManager::launchStreams(uint numberOfStreams) +void CudaStreamManager::registerStream(CudaStreamIndex streamIndex) +{ + if(streamIndex != CudaStreamIndex::Legacy) + cudaStreams.emplace(streamIndex, nullptr); +} +void CudaStreamManager::launchStreams() { - cudaStreams.resize(numberOfStreams); - for (cudaStream_t &stream : cudaStreams) - cudaStreamCreate(&stream); + for (auto &stream : cudaStreams) + cudaStreamCreate(&stream.second); } void CudaStreamManager::terminateStreams() { - for (cudaStream_t &stream : cudaStreams) - cudaStreamDestroy(stream); + for (auto &stream : cudaStreams) + cudaStreamDestroy(stream.second); } -cudaStream_t &CudaStreamManager::getStream(uint streamIndex) -{ return cudaStreams[streamIndex]; } - -int CudaStreamManager::getBorderStreamIndex() { return borderStreamIndex; } +cudaStream_t &CudaStreamManager::getStream(CudaStreamIndex streamIndex, uint multiStreamIndex) +{ + if(streamIndex == CudaStreamIndex::Legacy) return legacyStream; + if(streamIsRegistered(streamIndex)) + { + auto it = cudaStreams.find(streamIndex); + for(uint idx=0; idx<multiStreamIndex; idx++) it++; + return it->second; + } + return legacyStream; +} -int CudaStreamManager::getBulkStreamIndex() { return bulkStreamIndex; } +bool CudaStreamManager::streamIsRegistered(CudaStreamIndex streamIndex) +{ + return cudaStreams.count(streamIndex) > 0; +} void CudaStreamManager::createCudaEvents() { @@ -61,12 +75,12 @@ void CudaStreamManager::destroyCudaEvents() checkCudaErrors(cudaEventDestroy(startBulkKernel)); } -void CudaStreamManager::triggerStartBulkKernel(int streamIndex) +void CudaStreamManager::triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex) { - checkCudaErrors(cudaEventRecord(startBulkKernel, cudaStreams[streamIndex])); + checkCudaErrors(cudaEventRecord(startBulkKernel, getStream(streamIndex, multiStreamIndex))); } -void CudaStreamManager::waitOnStartBulkKernelEvent(int streamIndex) +void CudaStreamManager::waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex) { - checkCudaErrors(cudaStreamWaitEvent(cudaStreams[streamIndex], startBulkKernel)); + checkCudaErrors(cudaStreamWaitEvent(getStream(streamIndex, multiStreamIndex), startBulkKernel)); } diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h index c2d515ab5fe9c24388632a7ca9e1e4c78b7f1467..5c59bcd3a5e6178d6e70a63f803caf8e29f32604 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h @@ -30,32 +30,40 @@ #ifndef STREAM_MANAGER_H #define STREAM_MANAGER_H -#include <vector> -#include "Core/DataTypes.h" - +#include <map> +#include <cuda.h> #include <cuda_runtime.h> +#include "Core/DataTypes.h" +enum class CudaStreamIndex + { + Legacy, + Bulk, + SubDomainBorder, + Precursor, + ActuatorFarm + }; class CudaStreamManager -{ +{ private: - std::vector<cudaStream_t> cudaStreams; + std::multimap<CudaStreamIndex, cudaStream_t> cudaStreams; cudaEvent_t startBulkKernel = NULL; - const int borderStreamIndex = 1; - const int bulkStreamIndex = 0; + cudaStream_t legacyStream = CU_STREAM_LEGACY; + public: - void launchStreams(uint numberOfStreams); + void registerStream(CudaStreamIndex streamIndex); + void launchStreams(); void terminateStreams(); - cudaStream_t &getStream(uint streamIndex); - - int getBorderStreamIndex(); - int getBulkStreamIndex(); + cudaStream_t &getStream(CudaStreamIndex streamIndex, uint multiStreamIndex=0); + bool streamIsRegistered(CudaStreamIndex streamIndex); // Events void createCudaEvents(); void destroyCudaEvents(); - void triggerStartBulkKernel(int streamIndex); - void waitOnStartBulkKernelEvent(int strteamIndex); + + void triggerStartBulkKernel(CudaStreamIndex streamIndex, uint multiStreamIndex=0); + void waitOnStartBulkKernelEvent(CudaStreamIndex streamIndex, uint multiStreamIndex=0); }; #endif diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 7687ec926270f23e57608ca5f3084bd26d4de20e..e593d16d6ed1f69ca65a22606a157e7ea9e6b111 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -43,6 +43,7 @@ #include <basics/config/ConfigurationFile.h> +#include "Logger.h" #include "Parameter/CudaStreamManager.h" Parameter::Parameter() : Parameter(1, 0, {}) {} @@ -65,6 +66,8 @@ Parameter::Parameter(int numberOfProcesses, int myId, std::optional<const vf::ba initGridPaths(); initGridBasePoints(); initDefaultLBMkernelAllLevels(); + + this->cudaStreamManager = std::make_unique<CudaStreamManager>(); } Parameter::~Parameter() = default; @@ -500,10 +503,10 @@ void Parameter::initLBMSimulationParameter() parH[i]->sizePlaneXY = parH[i]->nx * parH[i]->ny; parH[i]->sizePlaneYZ = parH[i]->ny * parH[i]->nz; parH[i]->sizePlaneXZ = parH[i]->nx * parH[i]->nz; - parH[i]->mem_size_real = sizeof(real) * parH[i]->size_Mat; - parH[i]->mem_size_int = sizeof(unsigned int) * parH[i]->size_Mat; - parH[i]->mem_size_bool = sizeof(bool) * parH[i]->size_Mat; - parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz; +// parH[i]->mem_size_real = sizeof(real) * parH[i]->size_Mat; //DEPRECATED: related to full matrix +// parH[i]->mem_size_int = sizeof(unsigned int) * parH[i]->size_Mat; //DEPRECATED: related to full matrix +// parH[i]->mem_size_bool = sizeof(bool) * parH[i]->size_Mat; //DEPRECATED: related to full matrix +// parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz; //DEPRECATED: related to full matrix parH[i]->isEvenTimestep = true; parH[i]->startz = parH[i]->gridNZ * ic.myProcessId; parH[i]->endz = parH[i]->gridNZ * ic.myProcessId + parH[i]->gridNZ; @@ -568,10 +571,10 @@ void Parameter::initLBMSimulationParameter() parD[i]->sizePlaneXY = parH[i]->sizePlaneXY; parD[i]->sizePlaneYZ = parH[i]->sizePlaneYZ; parD[i]->sizePlaneXZ = parH[i]->sizePlaneXZ; - parD[i]->mem_size_real = sizeof(real) * parD[i]->size_Mat; - parD[i]->mem_size_int = sizeof(unsigned int) * parD[i]->size_Mat; - parD[i]->mem_size_bool = sizeof(bool) * parD[i]->size_Mat; - parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz; + //parD[i]->mem_size_real = sizeof(real) * parD[i]->size_Mat; //DEPRECATED: related to full matrix + //parD[i]->mem_size_int = sizeof(unsigned int) * parD[i]->size_Mat; //DEPRECATED: related to full matrix + //parD[i]->mem_size_bool = sizeof(bool) * parD[i]->size_Mat; //DEPRECATED: related to full matrix + //parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz; //DEPRECATED: related to full matrix parD[i]->isEvenTimestep = parH[i]->isEvenTimestep; parD[i]->startz = parH[i]->startz; parD[i]->endz = parH[i]->endz; @@ -586,6 +589,30 @@ void Parameter::initLBMSimulationParameter() parD[i]->distY = parH[i]->distY; parD[i]->distZ = parH[i]->distZ; } + + checkParameterValidityCumulantK17(); +} + +void Parameter::checkParameterValidityCumulantK17() const +{ + if (this->mainKernel != "CumulantK17") + return; + + const real viscosity = this->parH[maxlevel]->vis; + const real viscosityLimit = 1.0 / 42.0; + if (viscosity > viscosityLimit) { + VF_LOG_WARNING("The viscosity (in LB units) at level {} is {:1.3g}. It is recommended to keep it smaller than {:1.3g} " + "for the CumulantK17 collision kernel.", + maxlevel, viscosity, viscosityLimit); + } + + const real velocity = this->ic.u0; + const real velocityLimit = 0.1; + if (velocity > velocityLimit) { + VF_LOG_WARNING("The velocity (in LB units) is {:1.4g}. It is recommended to keep it smaller than {:1.4g} for the " + "CumulantK17 collision kernel.", + velocity, velocityLimit); + } } void Parameter::copyMeasurePointsArrayToVector(int lev) @@ -829,7 +856,7 @@ real Parameter::getLengthRatio() } real Parameter::getForceRatio() { - return this->getDensityRatio() * this->getVelocityRatio()/this->getTimeRatio(); + return (this->getDensityRatio()+1.0) * this->getVelocityRatio()/this->getTimeRatio(); } real Parameter::getScaledViscosityRatio(int level) { @@ -859,6 +886,10 @@ real Parameter::getScaledForceRatio(int level) { return this->getForceRatio()*(level+1); } +real Parameter::getScaledStressRatio(int level) +{ + return this->getVelocityRatio()*this->getVelocityRatio(); +} void Parameter::setRealX(real RealX) { ic.RealX = RealX; @@ -883,6 +914,10 @@ void Parameter::setPressOutZ(unsigned int PressOutZ) { ic.PressOutZ = PressOutZ; } +void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor) +{ + ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor; +} void Parameter::setMaxDev(int maxdev) { ic.maxdev = maxdev; @@ -1607,7 +1642,7 @@ void Parameter::setOutflowBoundaryNormalZ(std::string outflowNormalZ) void Parameter::setMainKernel(std::string kernel) { this->mainKernel = kernel; - if (kernel.find("Stream") != std::string::npos || kernel.find("Redesigned") != std::string::npos) + if ( kernel.find("CumulantK17") != std::string::npos ) this->kernelNeedsFluidNodeIndicesToRun = true; } void Parameter::setMultiKernelOn(bool isOn) @@ -1720,22 +1755,22 @@ unsigned int Parameter::getSizeMat(int level) { return parH[level]->size_Mat; } -unsigned int Parameter::getMemSizereal(int level) -{ - return parH[level]->mem_size_real; -} -unsigned int Parameter::getMemSizeInt(int level) -{ - return parH[level]->mem_size_int; -} -unsigned int Parameter::getMemSizeBool(int level) -{ - return parH[level]->mem_size_bool; -} -unsigned int Parameter::getMemSizerealYZ(int level) -{ - return parH[level]->mem_size_real_yz; -} +//unsigned int Parameter::getMemSizereal(int level) //DEPRECATED: related to full matrix +//{ +// return parH[level]->mem_size_real; +//} +//unsigned int Parameter::getMemSizeInt(int level) //DEPRECATED: related to full matrix +//{ +// return parH[level]->mem_size_int; +//} +//unsigned int Parameter::getMemSizeBool(int level) //DEPRECATED: related to full matrix +//{ +// return parH[level]->mem_size_bool; +//} +//unsigned int Parameter::getMemSizerealYZ(int level) //DEPRECATED: related to full matrix +//{ +// return parH[level]->mem_size_real_yz; +//} int Parameter::getFine() { return fine; @@ -1916,6 +1951,10 @@ unsigned int Parameter::getPressOutZ() { return ic.PressOutZ; } +real Parameter::getOutflowPressureCorrectionFactor() +{ + return ic.outflowPressureCorrectionFactor; +} int Parameter::getMaxDev() { return ic.maxdev; @@ -2657,8 +2696,7 @@ void Parameter::setUseStreams(bool useStreams) if (useStreams) { if (this->getNumprocs() != 1) { this->useStreams = useStreams; - this->cudaStreamManager = std::make_unique<CudaStreamManager>(); - return; + return; } else { std::cout << "Can't use streams with only one process!" << std::endl; } diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index cbb8bfd68702bc2285947eb76e6d0adc54a5b6c1..fa45b1742f20e32258195c78b630ce95175af938 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -54,6 +54,8 @@ class ConfigurationFile; } class CudaStreamManager; +class TransientBCInputFileReader; + //! \struct LBMSimulationParameter //! \brief struct holds and manages the LB-parameter of the simulation //! \brief For this purpose it holds structures and pointer for host and device data, respectively. @@ -65,16 +67,78 @@ struct LBMSimulationParameter { ////////////////////////////////////////////////////////////////////////// //! \brief stores the number of threads per GPU block uint numberofthreads; + //! \brief store all distribution functions for the D3Q27 + Distributions27 distributions; + ////////////////////////////////////////////////////////////////////////// + //! \brief stores the type for every lattice node (f.e. fluid node) + uint *typeOfGridNode; + ////////////////////////////////////////////////////////////////////////// + //! \brief store the neighbors in +X, +Y, +Z, and in diagonal negative direction + //! \brief this information is important because we use an indirect addressing scheme + uint *neighborX, *neighborY, *neighborZ, *neighborInverse; + ////////////////////////////////////////////////////////////////////////// + //! \brief store the coordinates for every lattice node + real *coordinateX, *coordinateY, *coordinateZ; + ////////////////////////////////////////////////////////////////////////// + //! \brief store the macroscopic values (velocity, density, pressure) + //! \brief for every lattice node + real *velocityX, *velocityY, *velocityZ, *rho, *pressure; + //! \brief stores the value for omega + real omega; + ////////////////////////////////////////////////////////////////////////// + //! \brief stores the number of nodes (based on indirect addressing scheme) + unsigned long long numberOfNodes; + //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity) + unsigned long long memSizeRealLBnodes, memSizeLonglongLBnodes; + + + + + + + ////////////////////////////////////////////////////////////////////////// + // DEPRECATED + ////////////////////////////////////////////////////////////////////////// // distributions/////////// // Distributions19 d0; Distributions27 d0; // DEPRECATED: distribution functions for full matrix (not sparse) - //! \brief store all distribution functions for the D3Q27 - Distributions27 distributions; + + // typeOfGridNode (formerly known as "geo") ///////////////////// + int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse) + + // k/////////////////////// + unsigned int *k; // DEPRECATED: index for full matrix + + // memsize///////////////// + //unsigned int mem_size_real_yz; + //unsigned int mem_size_bool; + //unsigned int mem_size_int; + //unsigned int mem_size_real; + + ////////////////////////////////////////////////////////////////////////// + + + + + + ////////////////////////////////////////////////////////////////////////// + // additional logic + ////////////////////////////////////////////////////////////////////////// // distributions F3//////// Distributions6 g6; + unsigned int size_Array_SP; + + + // memsizeSP///////////////// + + + + ////////////////////////////////////////////////////////////////////////// + + // advection diffusion ////////////////// //! \brief store all distribution functions for the D3Q7 advection diffusion field Distributions7 distributionsAD7; @@ -104,22 +168,6 @@ struct LBMSimulationParameter { real cStartx, cStarty, cStartz; real cFx, cFy, cFz; - // typeOfGridNode (formerly known as "geo") ///////////////////// - int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse) - //! \brief stores the type for every lattice node (f.e. fluid node) - unsigned int *typeOfGridNode; - - // k/////////////////////// - unsigned int *k; // DEPRECATED: index for full matrix - - // neighbor/////////////////////////////////////////////////////////////// - //! \brief store the neighbors in +X, +Y, +Z, and in diagonal negative direction - //! \brief this information is important because we use an indirect addressing scheme - uint *neighborX, *neighborY, *neighborZ, *neighborInverse; - - // coordinates//////////////////////////////////////////////////////////// - //! \brief store the coordinates for every lattice node - real *coordinateX, *coordinateY, *coordinateZ; // body forces//////////// real *forceX_SP, *forceY_SP, *forceZ_SP; @@ -138,11 +186,6 @@ struct LBMSimulationParameter { // macroscopic values////// // real *vx, *vy, *vz, *rho; // DEPRECATED: macroscopic values for full matrix - //! \brief store the macroscopic values (velocity, density, pressure) - //! \brief for every lattice node - real *velocityX, *velocityY, *velocityZ, *rho, *pressure; - //! \brief stores the value for omega - real omega; //! \brief stores the value for viscosity (on level 0) real vis; @@ -163,11 +206,6 @@ struct LBMSimulationParameter { unsigned int size_Mat; unsigned int sizePlaneXY, sizePlaneYZ, sizePlaneXZ; - // size of sparse matrix////////// - //! \brief stores the number of nodes (based on indirect addressing scheme) - unsigned int numberOfNodes; - unsigned int size_Array_SP; - // size of Plane btw. 2 GPUs////// unsigned int sizePlaneSB, sizePlaneRB, startB, endB; unsigned int sizePlaneST, sizePlaneRT, startT, endT; @@ -180,16 +218,6 @@ struct LBMSimulationParameter { unsigned int sizePlanePressOUT, startPOUT; bool isSetPress; - // memsizeSP///////////////// - //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity) - unsigned int mem_size_real_SP; - unsigned int mem_size_int_SP; - - // memsize///////////////// - unsigned int mem_size_real; - unsigned int mem_size_int; - unsigned int mem_size_bool; - unsigned int mem_size_real_yz; // print/////////////////// unsigned int startz, endz; @@ -218,16 +246,16 @@ struct LBMSimulationParameter { OffsetFC offFCBulk; unsigned int mem_size_kCF_off; unsigned int mem_size_kFC_off; - - // BC's//////////////////// + //! \brief stores the boundary condition data QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC, pressureBC; //! \brief number of lattice nodes for the boundary conditions - unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead; + unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead, numberOfPrecursorBCnodesRead; QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED QforBoundaryConditions propellerBC; QforBoundaryConditions geometryBC; + QforPrecursorBoundaryConditions precursorBC; QforBoundaryConditions geometryBCnormalX, geometryBCnormalY, geometryBCnormalZ; QforBoundaryConditions inflowBCnormalX, inflowBCnormalY, inflowBCnormalZ; QforBoundaryConditions outflowBCnormalX, outflowBCnormalY, outflowBCnormalZ; @@ -235,6 +263,8 @@ struct LBMSimulationParameter { unsigned int kInletQread, kOutletQread; // DEPRECATED WallModelParameters wallModel; + std::vector<SPtr<TransientBCInputFileReader>> transientBCInputFileReader; + real outflowPressureCorrectionFactor; // testRoundoffError Distributions27 kDistTestRE; @@ -367,10 +397,19 @@ struct LBMSimulationParameter { std::vector<EdgeNodePositions> edgeNodesYtoZ; /////////////////////////////////////////////////////// - uint *fluidNodeIndices; - uint numberOfFluidNodes; - uint *fluidNodeIndicesBorder; - uint numberOfFluidNodesBorder; + std::map<CollisionTemplate, uint*> taggedFluidNodeIndices = {{CollisionTemplate::Default, nullptr}, + {CollisionTemplate::SubDomainBorder,nullptr}, + {CollisionTemplate::WriteMacroVars, nullptr}, + {CollisionTemplate::ApplyBodyForce, nullptr}, + {CollisionTemplate::AllFeatures, nullptr}}; + std::map<CollisionTemplate, uint > numberOfTaggedFluidNodes = {{CollisionTemplate::Default, 0}, + {CollisionTemplate::SubDomainBorder,0}, + {CollisionTemplate::WriteMacroVars, 0}, + {CollisionTemplate::ApplyBodyForce, 0}, + {CollisionTemplate::AllFeatures, 0}}; + + std::vector<CollisionTemplate> allocatedBulkFluidNodeTags = {}; + }; //! \brief Class for LBM-parameter management @@ -471,6 +510,7 @@ public: void setpressBcPos(std::string pressBcPos); void setpressBcQs(std::string pressBcQs); void setpressBcValue(std::string pressBcValue); + void setOutflowPressureCorrectionFactor(real correctionFactor); void setpressBcValues(std::string pressBcValues); void setvelBcQs(std::string velBcQs); void setvelBcValues(std::string velBcValues); @@ -527,7 +567,6 @@ public: void setUseWale(bool useWale); void setTurbulenceModel(TurbulenceModel turbulenceModel); void setUseTurbulentViscosity(bool useTurbulentViscosity); - void setUseAMD(bool useAMD); void setSGSConstant(real SGSConstant); void setHasWallModelMonitor(bool hasWallModelMonitor); void setUseInitNeq(bool useInitNeq); @@ -726,10 +765,10 @@ public: unsigned int getPressOutID(); unsigned int getPressInZ(); unsigned int getPressOutZ(); - unsigned int getMemSizereal(int level); - unsigned int getMemSizeInt(int level); - unsigned int getMemSizeBool(int level); - unsigned int getMemSizerealYZ(int level); +// unsigned int getMemSizereal(int level); //DEPRECATED: related to full matrix +// unsigned int getMemSizeInt(int level); //DEPRECATED: related to full matrix +// unsigned int getMemSizeBool(int level); //DEPRECATED: related to full matrix +// unsigned int getMemSizerealYZ(int level); //DEPRECATED: related to full matrix unsigned int getSizeMat(int level); unsigned int getTimestepStart(); unsigned int getTimestepInit(); @@ -765,6 +804,8 @@ public: real getScaledDensityRatio(int level); //! \returns the pressure ratio in SI/LB units scaled to the respective level real getScaledPressureRatio(int level); + //! \returns the stress ratio in SI/LB units scaled to the respective level + real getScaledStressRatio(int level); //! \returns the time ratio in SI/LB units scaled to the respective level real getScaledTimeRatio(int level); //! \returns the length ratio in SI/LB units scaled to the respective level @@ -853,6 +894,7 @@ public: std::string getOutflowBoundaryNormalX(); std::string getOutflowBoundaryNormalY(); std::string getOutflowBoundaryNormalZ(); + real getOutflowPressureCorrectionFactor(); // CUDA random number curandState *getRandomState(); // Kernel @@ -896,6 +938,8 @@ private: void setPathAndFilename(std::string fname); + void checkParameterValidityCumulantK17() const; + private: bool compOn{ false }; bool diffOn{ false }; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp index 4025acf7acad362e9f0f3702cb897b9c1b6dbf3b..72a12ae880556e6e257eb69dee4e806617252629 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp @@ -1,4 +1,3 @@ -#include <gmock/gmock.h> #include "basics/tests/testUtilities.h" #include <filesystem> @@ -8,7 +7,6 @@ #include "Parameter.h" #include "basics/config/ConfigurationFile.h" - TEST(ParameterTest, passingEmptyFileWithoutPath_ShouldNotThrow) { // assuming that the config files is stored parallel to this file. @@ -37,7 +35,9 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor) // test optional parameter EXPECT_THAT(para.getOutputPath(), testing::Eq("/output/path/")); - EXPECT_THAT(para.getGridPath(), testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well + EXPECT_THAT( + para.getGridPath(), + testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well EXPECT_THAT(para.getgeoVec(), testing::Eq("/path/to/grid/geoVec.dat")); EXPECT_THAT(para.getMaxDev(), testing::Eq(2)); EXPECT_THAT(para.getDevices(), testing::ElementsAreArray({ 2, 3 })); @@ -163,7 +163,7 @@ TEST(ParameterTest, setGridPathOverridesDefaultGridPath) Parameter para(2, 1); para.setGridPath("gridPathTest"); - EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/1/")); + EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/1/")); EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/1/conc.dat")); } @@ -177,9 +177,8 @@ TEST(ParameterTest, setGridPathOverridesConfigFile) auto para = Parameter(2, 0, &config); para.setGridPath("gridPathTest"); - EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/0/")); + EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/")); EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat")); - } TEST(ParameterTest, userMissedSlash) @@ -189,7 +188,6 @@ TEST(ParameterTest, userMissedSlash) EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/")); EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/conc.dat")); - } TEST(ParameterTest, userMissedSlashMultiGPU) @@ -199,4 +197,87 @@ TEST(ParameterTest, userMissedSlashMultiGPU) EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/")); EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat")); -} \ No newline at end of file +} + +class ParameterTestCumulantK17 : public testing::Test +{ +protected: + void SetUp() override + { + } + + bool stdoutContainsWarning() + { + std::string output = testing::internal::GetCapturedStdout(); + return output.find("warning") != std::string::npos; + } + + Parameter para; +}; + +TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsTooHigh_expectWarning) +{ + + para.setVelocityLB(0.11); + para.setMainKernel("CumulantK17"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_TRUE(stdoutContainsWarning()); +} + +TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsOk_expectNoWarning) +{ + para.setVelocityLB(0.09); + para.setMainKernel("CumulantK17"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_FALSE(stdoutContainsWarning()); +} + +TEST_F(ParameterTestCumulantK17, NotCumulantK17_VelocityIsTooHigh_expectNoWarning) +{ + para.setVelocityLB(42); + para.setMainKernel("K"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_FALSE(stdoutContainsWarning()); +} + +TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsTooHigh_expectWarning) +{ + para.setViscosityLB(0.024); + para.setMainKernel("CumulantK17"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_TRUE(stdoutContainsWarning()); +} + +TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsOk_expectNoWarning) +{ + para.setViscosityLB(0.023); + para.setMainKernel("CumulantK17"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_FALSE(stdoutContainsWarning()); +} + +TEST_F(ParameterTestCumulantK17, NotCumulantK17_ViscosityIsTooHigh_expectNoWarning) +{ + para.setViscosityLB(10); + para.setMainKernel("K"); + testing::internal::CaptureStdout(); + + para.initLBMSimulationParameter(); + + EXPECT_FALSE(stdoutContainsWarning()); +} diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp index 7c710f50afb0ae07edd53ef9d68e294c7af54ac1..e0156e3fbae46282baeb1359c719a077f021cf6b 100644 --- a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp +++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp @@ -141,12 +141,12 @@ void initParticles(Parameter* para) para->getParH(lev)->plp.coordZabsolut[i] = (real)zCoordVec[i]; // find IDs - for (unsigned int ii = 0; ii < para->getParH(lev)->numberOfNodes; ii++) + for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) { - if ((para->getParH(lev)->coordinateX[ii] <= para->getParH(lev)->plp.coordXabsolut[i]) && - ((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[ii]) <= dx)) + if ((para->getParH(lev)->coordinateX[index] <= para->getParH(lev)->plp.coordXabsolut[i]) && + ((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[index]) <= dx)) { - tempID.push_back(ii); + tempID.push_back((int)index); } } @@ -455,7 +455,7 @@ void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager) int counter2 = 0; ////////////////////////////////////////////////////////////////////////// //redefine fluid nodes - for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++) + for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) { if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD) { diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu new file mode 100644 index 0000000000000000000000000000000000000000..9447a8636e801c132df9cef2feced4b5ab4e68de --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu @@ -0,0 +1,629 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ActuatorFarm.cu +//! \ingroup PreCollisionInteractor +//! \author Henrik Asmuth, Henry Korb +//====================================================================================== +#include "ActuatorFarm.h" + +#include <cuda.h> +#include <cuda_runtime.h> +#include <helper_cuda.h> + +#include "cuda/CudaGrid.h" +#include "VirtualFluids_GPU/GPU/GeometryUtils.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" + +#include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" +#include "DataStructureInitializer/GridProvider.h" +#include "GPU/CudaMemoryManager.h" +#include "lbm/constants/NumericConstants.h" +#include "logger/Logger.h" + +using namespace vf::lbm::constant; + + +__host__ __device__ __inline__ uint calcNode(uint bladeNode, uint numberOfBladeNodes, uint blade, uint numberOfBlades, uint turbine, uint numberOfTurbines) +{ + + return bladeNode+numberOfBladeNodes*(blade+numberOfBlades*turbine); +} + +__host__ __device__ __inline__ void calcTurbineBladeAndBladeNode(uint node, uint& bladeNode, uint numberOfBladeNodes, uint& blade, uint numberOfBlades, uint& turbine, uint numberOfTurbines) +{ + turbine = node/(numberOfBladeNodes*numberOfBlades); + uint x_off = turbine*numberOfBladeNodes*numberOfBlades; + blade = (node - x_off)/numberOfBlades; + uint y_off = numberOfBladeNodes*blade+x_off; + bladeNode = (node - y_off)/numberOfBladeNodes; +} + +__host__ __device__ __forceinline__ real distSqrd(real distX, real distY, real distZ) +{ + return distX*distX+distY*distY+distZ*distZ; +} + +void swapArrays(real* &arr1, real* &arr2) +{ + real* tmp = arr1; + arr1 = arr2; + arr2 = tmp; +} + +__host__ __device__ __inline__ void rotateFromBladeToGlobal( + real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, + real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF, + real& azimuth, real& yaw) +{ + real tmpX, tmpY, tmpZ; + + rotateAboutX3D(azimuth, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, tmpX, tmpY, tmpZ); + rotateAboutZ3D(yaw, tmpX, tmpY, tmpZ, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF); + +} + +__host__ __device__ __inline__ void rotateFromGlobalToBlade( + real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, + real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF, + real& azimuth, real& yaw) +{ + real tmpX, tmpY, tmpZ; + + invRotateAboutZ3D(yaw, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, tmpX, tmpY, tmpZ); + invRotateAboutX3D(azimuth, tmpX, tmpY, tmpZ, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF); +} + +__global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, + uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB, + real* vx, real* vy, real* vz, + real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, + real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, + uint numberOfTurbines, uint numberOfBlades, uint numberOfBladeNodes, + real* azimuths, real* yaws, real* omegas, + real* turbPosX, real* turbPosY, real* turbPosZ, + uint* bladeIndices, real velocityRatio, real invDeltaX) +{ + + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); + + if(nodeIndex>=numberOfBladeNodes*numberOfBlades*numberOfTurbines) return; + + uint turbine, bladeNode, blade; + + calcTurbineBladeAndBladeNode(nodeIndex, bladeNode, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines); + + real bladeCoordX_BF = bladeCoordsX[nodeIndex]; + real bladeCoordY_BF = bladeCoordsY[nodeIndex]; + real bladeCoordZ_BF = bladeCoordsZ[nodeIndex]; + + real bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF; + + real localAzimuth = azimuths[turbine]+blade*c2Pi/numberOfBlades; + real yaw = yaws[turbine]; + + + rotateFromBladeToGlobal(bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, + bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, + localAzimuth, yaw); + + bladeCoordX_GF += turbPosX[turbine]; + bladeCoordY_GF += turbPosY[turbine]; + bladeCoordZ_GF += turbPosZ[turbine]; + + uint k, ke, kn, kt; + uint kne, kte, ktn, ktne; + + k = findNearestCellBSW(bladeIndices[nodeIndex], + gridCoordsX, gridCoordsY, gridCoordsZ, + bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, + neighborsX, neighborsY, neighborsZ, neighborsWSB); + + bladeIndices[nodeIndex] = k; + + getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ); + + real dW, dE, dN, dS, dT, dB; + + real distX = invDeltaX*(bladeCoordX_GF-gridCoordsX[k]); + real distY = invDeltaX*(bladeCoordY_GF-gridCoordsY[k]); + real distZ = invDeltaX*(bladeCoordZ_GF-gridCoordsZ[k]); + + getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX, distY, distZ); + + real bladeVelX_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx)*velocityRatio; + real bladeVelY_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy)*velocityRatio; + real bladeVelZ_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz)*velocityRatio; + + real bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF; + + rotateFromGlobalToBlade(bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF, + bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, + localAzimuth, yaw); + + bladeVelocitiesX[nodeIndex] = bladeVelX_BF; + bladeVelocitiesY[nodeIndex] = bladeVelY_BF+omegas[turbine]*bladeCoordZ_BF; + bladeVelocitiesZ[nodeIndex] = bladeVelZ_BF; +} + + +__global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, + real* gridForcesX, real* gridForcesY, real* gridForcesZ, + real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, + real* bladeForcesX, real* bladeForcesY, real* bladeForcesZ, + const uint numberOfTurbines, const uint numberOfBlades, const uint numberOfBladeNodes, + real* azimuths, real* yaws, real* diameters, + real* turbPosX, real* turbPosY, real* turbPosZ, + uint* gridIndices, uint nIndices, + const real invEpsilonSqrd, const real factorGaussian) +{ + + const uint index = vf::gpu::getNodeIndex(); + + if(index>=nIndices) return; + + + uint gridIndex = gridIndices[index]; + + real gridCoordX_GF = gridCoordsX[gridIndex]; + real gridCoordY_GF = gridCoordsY[gridIndex]; + real gridCoordZ_GF = gridCoordsZ[gridIndex]; + + real gridForceX_RF = c0o1; + real gridForceY_RF = c0o1; + real gridForceZ_RF = c0o1; + + real dAzimuth = c2Pi/numberOfBlades; + + for(uint turbine = 0; turbine<numberOfTurbines; turbine++) + { + real radius = c1o2*diameters[turbine]; + real gridCoordX_RF = gridCoordX_GF - turbPosX[turbine]; + real gridCoordY_RF = gridCoordY_GF - turbPosY[turbine]; + real gridCoordZ_RF = gridCoordZ_GF - turbPosZ[turbine]; + + if(distSqrd(gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF)*invEpsilonSqrd > radius*radius*invEpsilonSqrd+c7o1) + continue; + + real azimuth = azimuths[turbine]; + real yaw = yaws[turbine]; + + for( uint blade=0; blade<numberOfBlades; blade++) + { + real localAzimuth = azimuth+blade*dAzimuth; + + + real gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF; + + rotateFromGlobalToBlade(gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF, + gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF, + localAzimuth, yaw); + + uint node; + uint nextNode = calcNode(0, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines); + + real last_z = c0o1; + real current_z = c0o1; + real next_z = bladeCoordsZ[nextNode]; + + real x, y, dz, eta, forceX_RF, forceY_RF, forceZ_RF; + + for( uint bladeNode=0; bladeNode<numberOfBladeNodes-1; bladeNode++) + { + node = nextNode; + nextNode = calcNode(bladeNode+1, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines); + + x = bladeCoordsX[node]; + y = bladeCoordsY[node]; + last_z = current_z; + current_z = next_z; + next_z = bladeCoordsZ[nextNode]; + + dz = c1o2*(next_z-last_z); + + eta = dz*factorGaussian*exp(-distSqrd(x-gridCoordX_BF, y-gridCoordY_BF, current_z-gridCoordZ_BF)*invEpsilonSqrd); + rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], + forceX_RF, forceY_RF, forceZ_RF, + localAzimuth, yaw); + + gridForceX_RF += forceX_RF*eta; + gridForceY_RF += forceY_RF*eta; + gridForceZ_RF += forceZ_RF*eta; + } + + //Handle last node separately + + node = nextNode; + + x = bladeCoordsX[node]; + y = bladeCoordsY[node]; + last_z = current_z; + current_z = next_z; + + dz = c1o2*(radius-last_z); + + eta = dz*factorGaussian*exp(-distSqrd(x-gridCoordX_BF, y-gridCoordY_BF, current_z-gridCoordZ_BF)*invEpsilonSqrd); + + rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], + forceX_RF, forceY_RF, forceZ_RF, + localAzimuth, yaw); + + gridForceX_RF += forceX_RF*eta; + gridForceY_RF += forceY_RF*eta; + gridForceZ_RF += forceZ_RF*eta; + } + } + + gridForcesX[gridIndex] += gridForceX_RF; + gridForcesY[gridIndex] += gridForceY_RF; + gridForcesZ[gridIndex] += gridForceZ_RF; +} + +void ActuatorFarm::addTurbine(real posX, real posY, real posZ, real diameter, real omega, real azimuth, real yaw, std::vector<real> bladeRadii) +{ + preInitPosX.push_back(posX); + preInitPosY.push_back(posY); + preInitPosZ.push_back(posZ); + preInitOmegas.push_back(omega); + preInitAzimuths.push_back(azimuth); + preInitYaws.push_back(yaw); + preInitDiameters.push_back(diameter); + preInitBladeRadii.push_back(bladeRadii); +} + +void ActuatorFarm::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) +{ + if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorFarm but BodyForce is not set in Parameter."); + this->forceRatio = para->getForceRatio(); + this->initTurbineGeometries(cudaMemoryManager); + this->initBladeCoords(cudaMemoryManager); + this->initBladeIndices(para, cudaMemoryManager); + this->initBladeVelocities(cudaMemoryManager); + this->initBladeForces(cudaMemoryManager); + this->initBoundingSpheres(para, cudaMemoryManager); + this->streamIndex = 0; +} + +void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t) +{ + if (level != this->level) return; + + cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::ActuatorFarm, this->streamIndex); + + if(useHostArrays) cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); + + vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfNodes); + + interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads, 0, stream >>>( + para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, + para->getParD(this->level)->neighborX, para->getParD(this->level)->neighborY, para->getParD(this->level)->neighborZ, para->getParD(this->level)->neighborInverse, + para->getParD(this->level)->velocityX, para->getParD(this->level)->velocityY, para->getParD(this->level)->velocityZ, + this->bladeCoordsXDCurrentTimestep, this->bladeCoordsYDCurrentTimestep, this->bladeCoordsZDCurrentTimestep, + this->bladeVelocitiesXDCurrentTimestep, this->bladeVelocitiesYDCurrentTimestep, this->bladeVelocitiesZDCurrentTimestep, + this->numberOfTurbines, this->numberOfBlades, this->numberOfBladeNodes, + this->azimuthsD, this->yawsD, this->omegasD, + this->turbinePosXD, this->turbinePosYD, this->turbinePosZD, + this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX); + + cudaStreamSynchronize(stream); + if(useHostArrays) cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this); + this->calcBladeForces(); + this->swapDeviceArrays(); + + if(useHostArrays) cudaMemoryManager->cudaCopyBladeForcesHtoD(this); + + vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfIndices); + + applyBodyForces<<<sphereGrid.grid, sphereGrid.threads, 0, stream>>>( + para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, + para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP, + this->bladeCoordsXDCurrentTimestep, this->bladeCoordsYDCurrentTimestep, this->bladeCoordsZDCurrentTimestep, + this->bladeForcesXDCurrentTimestep, this->bladeForcesYDCurrentTimestep, this->bladeForcesZDCurrentTimestep, + this->numberOfTurbines, this->numberOfBlades, this->numberOfBladeNodes, + this->azimuthsD, this->yawsD, this->diametersD, + this->turbinePosXD, this->turbinePosYD, this->turbinePosZD, + this->boundingSphereIndicesD, this->numberOfIndices, + this->invEpsilonSqrd, this->factorGaussian); + cudaMemoryManager->cudaCopyBladeOrientationsHtoD(this); + cudaStreamSynchronize(stream); +} + + +void ActuatorFarm::free(Parameter* para, CudaMemoryManager* cudaMemoryManager) +{ + cudaMemoryManager->cudaFreeBladeGeometries(this); + cudaMemoryManager->cudaFreeBladeOrientations(this); + cudaMemoryManager->cudaFreeBladeCoords(this); + cudaMemoryManager->cudaFreeBladeVelocities(this); + cudaMemoryManager->cudaFreeBladeForces(this); + cudaMemoryManager->cudaFreeBladeIndices(this); + cudaMemoryManager->cudaFreeSphereIndices(this); +} + + +void ActuatorFarm::calcForcesEllipticWing() +{ + real u_rel, v_rel, u_rel_sq; + real phi; + real Cl = c1o1; + real Cd = c0o1; + real c0 = 20*c1o10; + real c, Cn, Ct; + for(uint turbine=0; turbine<this->numberOfTurbines; turbine++) + { + real diameter = this->diametersH[turbine]; + for( uint blade=0; blade<this->numberOfBlades; blade++) + { + for( uint bladeNode=0; bladeNode<this->numberOfBladeNodes; bladeNode++) + { + uint node = calcNode(bladeNode, this->numberOfBladeNodes, blade, this->numberOfBlades, turbine, this->numberOfTurbines); + + u_rel = this->bladeVelocitiesXH[node]; + v_rel = this->bladeVelocitiesYH[node]; + u_rel_sq = u_rel*u_rel+v_rel*v_rel; + phi = atan2(u_rel, v_rel); + + real tmp = c4o1*this->bladeRadiiH[bladeNode]/diameter-c1o1; + c = c0 * sqrt( c1o1- tmp*tmp ); + Cn = Cl*cos(phi)+Cd*sin(phi); + Ct = Cl*sin(phi)-Cd*cos(phi); + real fx = c1o2*u_rel_sq*c*this->density*Cn; + real fy = c1o2*u_rel_sq*c*this->density*Ct; + this->bladeForcesXH[node] = -fx; + this->bladeForcesYH[node] = -fy; + this->bladeForcesZH[node] = c0o1; + // printf("u %f v %f fx %f fy %f \n", u_rel, v_rel, fx, fy); + } + } + azimuthsH[turbine] = azimuthsH[turbine]+deltaT*omegasH[turbine]; + } +} + +void ActuatorFarm::calcBladeForces() +{ + this->calcForcesEllipticWing(); +} + +void ActuatorFarm::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) +{ + std::vector<uint> indicesInSphere(this->boundingSphereIndicesH, this->boundingSphereIndicesH+this->numberOfIndices); + gridProvider->tagFluidNodeIndices(indicesInSphere, CollisionTemplate::AllFeatures, this->level); +} + + +void ActuatorFarm::initTurbineGeometries(CudaMemoryManager* cudaMemoryManager) +{ + this->numberOfTurbines = uint(this->preInitDiameters.size()); + this->numberOfNodes = numberOfTurbines*numberOfBladeNodes*numberOfBlades; + + cudaMemoryManager->cudaAllocBladeGeometries(this); + cudaMemoryManager->cudaAllocBladeOrientations(this); + + for(uint turbine=0; turbine<this->numberOfTurbines; turbine++) + { + for(uint node=0; node<this->numberOfBladeNodes; node++) + { + this->bladeRadiiH[calcNode(node, numberOfBladeNodes, 0, 1, turbine, numberOfTurbines)] = this->preInitBladeRadii[turbine][node]; + } + + } + std::copy(preInitPosX.begin(), preInitPosX.end(), turbinePosXH); + std::copy(preInitPosY.begin(), preInitPosY.end(), turbinePosYH); + std::copy(preInitPosZ.begin(), preInitPosZ.end(), turbinePosZH); + std::copy(preInitDiameters.begin(), preInitDiameters.end(), diametersH); + + cudaMemoryManager->cudaCopyBladeGeometriesHtoD(this); + std::copy(preInitAzimuths.begin(), preInitAzimuths.end(), this->azimuthsH); + std::copy(preInitOmegas.begin(), preInitOmegas.end(), this->omegasH); + std::copy(preInitYaws.begin(), preInitYaws.end(), this->yawsH); + + cudaMemoryManager->cudaCopyBladeOrientationsHtoD(this); + this->factorGaussian = pow(this->epsilon*sqrt(cPi),-c3o1)/this->forceRatio; +} + +void ActuatorFarm::initBladeCoords(CudaMemoryManager* cudaMemoryManager) +{ + cudaMemoryManager->cudaAllocBladeCoords(this); + + for(uint turbine=0; turbine<numberOfTurbines; turbine++) + { + for(uint blade=0; blade<this->numberOfBlades; blade++) + { + for(uint bladeNode=0; bladeNode<this->numberOfBladeNodes; bladeNode++) + { + uint node = calcNode(bladeNode, this->numberOfBladeNodes, blade, this->numberOfBlades, turbine, this->numberOfTurbines); + + this->bladeCoordsXH[node] = c0o1; + this->bladeCoordsYH[node] = c0o1; + this->bladeCoordsZH[node] = this->bladeRadiiH[calcNode(bladeNode, numberOfBladeNodes, 0, 1, turbine, numberOfTurbines)]; + } + } + } + cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); + swapArrays(this->bladeCoordsXDCurrentTimestep, this->bladeCoordsXDPreviousTimestep); + swapArrays(this->bladeCoordsYDCurrentTimestep, this->bladeCoordsYDPreviousTimestep); + swapArrays(this->bladeCoordsZDCurrentTimestep, this->bladeCoordsZDPreviousTimestep); + cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); +} + +void ActuatorFarm::initBladeVelocities(CudaMemoryManager* cudaMemoryManager) +{ + cudaMemoryManager->cudaAllocBladeVelocities(this); + + std::fill_n(this->bladeVelocitiesXH, this->numberOfNodes, c0o1); + std::fill_n(this->bladeVelocitiesYH, this->numberOfNodes, c0o1); + std::fill_n(this->bladeVelocitiesZH, this->numberOfNodes, c0o1); + + cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this); + swapArrays(this->bladeVelocitiesXDCurrentTimestep, this->bladeVelocitiesXDPreviousTimestep); + swapArrays(this->bladeVelocitiesYDCurrentTimestep, this->bladeVelocitiesYDPreviousTimestep); + swapArrays(this->bladeVelocitiesZDCurrentTimestep, this->bladeVelocitiesZDPreviousTimestep); + cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this); +} + +void ActuatorFarm::initBladeForces(CudaMemoryManager* cudaMemoryManager) +{ + cudaMemoryManager->cudaAllocBladeForces(this); + + std::fill_n(this->bladeForcesXH, this->numberOfNodes, c0o1); + std::fill_n(this->bladeForcesYH, this->numberOfNodes, c0o1); + std::fill_n(this->bladeForcesZH, this->numberOfNodes, c0o1); + + cudaMemoryManager->cudaCopyBladeForcesHtoD(this); + swapArrays(this->bladeForcesXDCurrentTimestep, this->bladeForcesXDPreviousTimestep); + swapArrays(this->bladeForcesYDCurrentTimestep, this->bladeForcesYDPreviousTimestep); + swapArrays(this->bladeForcesZDCurrentTimestep, this->bladeForcesZDPreviousTimestep); + cudaMemoryManager->cudaCopyBladeForcesHtoD(this); +} + +void ActuatorFarm::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager) +{ + cudaMemoryManager->cudaAllocBladeIndices(this); + + std::fill_n(this->bladeIndicesH, this->numberOfNodes, 1); + + cudaMemoryManager->cudaCopyBladeIndicesHtoD(this); +} + +void ActuatorFarm::initBoundingSpheres(Parameter* para, CudaMemoryManager* cudaMemoryManager) +{ + std::vector<int> nodesInSpheres; + + for(uint turbine=0; turbine<this->numberOfTurbines; turbine++) + { + real sphereRadius = c1o2*this->diametersH[turbine]+c4o1*this->epsilon; + + real posX = this->turbinePosXH[turbine]; + real posY = this->turbinePosYH[turbine]; + real posZ = this->turbinePosZH[turbine]; + + real sphereRadiusSqrd = sphereRadius*sphereRadius; + + uint minimumNumberOfNodesPerSphere = (uint)(c4o3*cPi*pow(sphereRadius-this->deltaX, c3o1)/pow(this->deltaX, c3o1)); + uint nodesInThisSphere = 0; + + for (size_t pos = 1; pos <= para->getParH(this->level)->numberOfNodes; pos++) + { + const real distX = para->getParH(this->level)->coordinateX[pos]-posX; + const real distY = para->getParH(this->level)->coordinateY[pos]-posY; + const real distZ = para->getParH(this->level)->coordinateZ[pos]-posZ; + if(distSqrd(distX,distY,distZ) < sphereRadiusSqrd) + { + nodesInSpheres.push_back((int)pos); + nodesInThisSphere++; + } + } + + if(nodesInThisSphere<minimumNumberOfNodesPerSphere) + { + VF_LOG_CRITICAL("Found only {} nodes in bounding sphere of turbine no. {}, expected at least {}!", nodesInThisSphere, turbine, minimumNumberOfNodesPerSphere); + throw std::runtime_error("ActuatorFarm::initBoundingSpheres: Turbine bounding sphere partially out of domain."); + } + } + + this->numberOfIndices = uint(nodesInSpheres.size()); + + cudaMemoryManager->cudaAllocSphereIndices(this); + std::copy(nodesInSpheres.begin(), nodesInSpheres.end(), this->boundingSphereIndicesH); + cudaMemoryManager->cudaCopySphereIndicesHtoD(this); +} + +void ActuatorFarm::setAllAzimuths(real* _azimuths) +{ + std::copy_n(_azimuths, this->numberOfTurbines, this->azimuthsH); +} + +void ActuatorFarm::setAllOmegas(real* _omegas) +{ + std::copy_n(_omegas, this->numberOfTurbines, this->omegasH); +} + +void ActuatorFarm::setAllYaws(real* _yaws) +{ + std::copy_n(_yaws, this->numberOfTurbines, this->yawsH); +} + +void ActuatorFarm::setAllBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ) +{ + std::copy_n(_bladeCoordsX, this->numberOfNodes, this->bladeCoordsXH); + std::copy_n(_bladeCoordsY, this->numberOfNodes, this->bladeCoordsYH); + std::copy_n(_bladeCoordsZ, this->numberOfNodes, this->bladeCoordsZH); +} + +void ActuatorFarm::setAllBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ) +{ + std::copy_n(_bladeVelocitiesX, this->numberOfNodes, this->bladeVelocitiesXH); + std::copy_n(_bladeVelocitiesY, this->numberOfNodes, this->bladeVelocitiesYH); + std::copy_n(_bladeVelocitiesZ, this->numberOfNodes, this->bladeVelocitiesZH); +} + +void ActuatorFarm::setAllBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ) +{ + std::copy_n(_bladeForcesX, this->numberOfNodes, this->bladeForcesXH); + std::copy_n(_bladeForcesY, this->numberOfNodes, this->bladeForcesYH); + std::copy_n(_bladeForcesZ, this->numberOfNodes, this->bladeForcesZH); + +}void ActuatorFarm::setTurbineBladeCoords(uint turbine, real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ) +{ + std::copy_n(_bladeCoordsX, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsXH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeCoordsY, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsYH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeCoordsZ, numberOfBladeNodes*numberOfBlades, &this->bladeCoordsZH[turbine*numberOfBladeNodes*numberOfBlades]); +} + +void ActuatorFarm::setTurbineBladeVelocities(uint turbine, real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ) +{ + std::copy_n(_bladeVelocitiesX, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesXH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeVelocitiesY, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesYH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeVelocitiesZ, numberOfBladeNodes*numberOfBlades, &this->bladeVelocitiesZH[turbine*numberOfBladeNodes*numberOfBlades]); +} + +void ActuatorFarm::setTurbineBladeForces(uint turbine, real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ) +{ + std::copy_n(_bladeForcesX, numberOfBladeNodes*numberOfBlades, &this->bladeForcesXH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeForcesY, numberOfBladeNodes*numberOfBlades, &this->bladeForcesYH[turbine*numberOfBladeNodes*numberOfBlades]); + std::copy_n(_bladeForcesZ, numberOfBladeNodes*numberOfBlades, &this->bladeForcesZH[turbine*numberOfBladeNodes*numberOfBlades]); +} + +void ActuatorFarm::swapDeviceArrays() +{ + swapArrays(this->bladeCoordsXDPreviousTimestep, this->bladeCoordsXDCurrentTimestep); + swapArrays(this->bladeCoordsYDPreviousTimestep, this->bladeCoordsYDCurrentTimestep); + swapArrays(this->bladeCoordsZDPreviousTimestep, this->bladeCoordsZDCurrentTimestep); + + swapArrays(this->bladeVelocitiesXDPreviousTimestep, this->bladeVelocitiesXDCurrentTimestep); + swapArrays(this->bladeVelocitiesYDPreviousTimestep, this->bladeVelocitiesYDCurrentTimestep); + swapArrays(this->bladeVelocitiesZDPreviousTimestep, this->bladeVelocitiesZDCurrentTimestep); + + swapArrays(this->bladeForcesXDPreviousTimestep, this->bladeForcesXDCurrentTimestep); + swapArrays(this->bladeForcesYDPreviousTimestep, this->bladeForcesYDCurrentTimestep); + swapArrays(this->bladeForcesZDPreviousTimestep, this->bladeForcesZDCurrentTimestep); +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h new file mode 100644 index 0000000000000000000000000000000000000000..8e21cdb6b21efd323f6723e21d6b28614109f1ec --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h @@ -0,0 +1,197 @@ +#ifndef ActuatorFarm_H +#define ActuatorFarm_H + +#include "PreCollisionInteractor.h" +#include "PointerDefinitions.h" +#include "lbm/constants/NumericConstants.h" +#include <stdexcept> + +using namespace vf::lbm::constant; + +class Parameter; +class GridProvider; +using namespace vf::lbm::constant; + +class ActuatorFarm : public PreCollisionInteractor +{ +public: + ActuatorFarm( + const uint _nBlades, + const real _density, + const uint _nBladeNodes, + const real _epsilon, + int _level, + const real _deltaT, + const real _deltaX, + const bool _useHostArrays + ) : + numberOfBlades(_nBlades), + density(_density), + numberOfBladeNodes(_nBladeNodes), + epsilon(_epsilon), + level(_level), + useHostArrays(_useHostArrays), + numberOfTurbines(0), + numberOfNodes(0), + PreCollisionInteractor() + { + this->deltaT = _deltaT*exp2(-this->level); + this->deltaX = _deltaX*exp2(-this->level); + this->invEpsilonSqrd = 1/(epsilon*epsilon); + this->invDeltaX = c1o1/this->deltaX; + + if(this->epsilon<this->deltaX) + throw std::runtime_error("ActuatorFarm::ActuatorFarm: epsilon needs to be larger than dx!"); + } + + ~ActuatorFarm() override = default; + void addTurbine(real turbinePosX, real turbinePosY, real turbinePosZ, real diameter, real omega, real azimuth, real yaw, std::vector<real> bladeRadii); + void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override; + void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override; + void free(Parameter* para, CudaMemoryManager* cudaManager) override; + void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override; + + void write(uint t); + + real getDensity(){ return this->density; }; + real getDeltaT(){ return this->deltaT; }; + real getDeltaX(){ return this->deltaX; }; + + uint getNumberOfTurbines(){ return this->numberOfTurbines; }; + uint getNumberOfNodesPerBlade(){ return this->numberOfBladeNodes; }; + uint getNumberOfBladesPerTurbine(){ return this->numberOfBlades; }; + + uint getNumberOfIndices(){ return this->numberOfIndices; }; + uint getNumberOfNodes(){ return this->numberOfNodes; }; + + real* getAllAzimuths(){ return azimuthsH; }; + real* getAllOmegas(){ return omegasH; }; + real* getAllYaws(){ return yawsH; }; + + real* getAllTurbinePosX(){ return turbinePosXH; }; + real* getAllTurbinePosY(){ return turbinePosYH; }; + real* getAllTurbinePosZ(){ return turbinePosZH; }; + + real getTurbineAzimuth(uint turbine){ return azimuthsH[turbine]; }; + real getTurbineOmega (uint turbine){ return omegasH[turbine]; }; + real getTurbineYaw (uint turbine){ return yawsH[turbine]; }; + + real getTurbinePosX(uint turbine){ return turbinePosXH[turbine]; }; + real getTurbinePosY(uint turbine){ return turbinePosYH[turbine]; }; + real getTurbinePosZ(uint turbine){ return turbinePosZH[turbine]; }; + + real* getAllBladeRadii(){ return this->bladeRadiiH; }; + real* getAllBladeCoordsX(){ return this->bladeCoordsXH; }; + real* getAllBladeCoordsY(){ return this->bladeCoordsYH; }; + real* getAllBladeCoordsZ(){ return this->bladeCoordsZH; }; + real* getAllBladeVelocitiesX(){ return this->bladeVelocitiesXH; }; + real* getAllBladeVelocitiesY(){ return this->bladeVelocitiesYH; }; + real* getAllBladeVelocitiesZ(){ return this->bladeVelocitiesZH; }; + real* getAllBladeForcesX(){ return this->bladeForcesXH; }; + real* getAllBladeForcesY(){ return this->bladeForcesYH; }; + real* getAllBladeForcesZ(){ return this->bladeForcesZH; }; + + real* getTurbineBladeRadii(uint turbine){ return &this->bladeRadiiH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeCoordsX(uint turbine){ return &this->bladeCoordsXH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeCoordsY(uint turbine){ return &this->bladeCoordsYH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeCoordsZ(uint turbine){ return &this->bladeCoordsZH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesX(uint turbine){ return &this->bladeVelocitiesXH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesY(uint turbine){ return &this->bladeVelocitiesYH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesZ(uint turbine){ return &this->bladeVelocitiesZH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesX(uint turbine){ return &this->bladeForcesXH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesY(uint turbine){ return &this->bladeForcesYH[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesZ(uint turbine){ return &this->bladeForcesZH[turbine*numberOfBladeNodes*numberOfBlades]; }; + + real* getAllBladeRadiiDevice(){ return this->bladeRadiiD; }; + real* getAllBladeCoordsXDevice(){ return this->bladeCoordsXDCurrentTimestep; }; + real* getAllBladeCoordsYDevice(){ return this->bladeCoordsYDCurrentTimestep; }; + real* getAllBladeCoordsZDevice(){ return this->bladeCoordsZDCurrentTimestep; }; + real* getAllBladeVelocitiesXDevice(){ return this->bladeVelocitiesXDCurrentTimestep; }; + real* getAllBladeVelocitiesYDevice(){ return this->bladeVelocitiesYDCurrentTimestep; }; + real* getAllBladeVelocitiesZDevice(){ return this->bladeVelocitiesZDCurrentTimestep; }; + real* getAllBladeForcesXDevice(){ return this->bladeForcesXDCurrentTimestep; }; + real* getAllBladeForcesYDevice(){ return this->bladeForcesYDCurrentTimestep; }; + real* getAllBladeForcesZDevice(){ return this->bladeForcesZDCurrentTimestep; }; + + real* getTurbineBladeRadiiDevice(uint turbine){ return &this->bladeRadiiD[turbine*numberOfBladeNodes]; }; + real* getTurbineBladeCoordsXDevice(uint turbine){ return &this->bladeCoordsXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeCoordsYDevice(uint turbine){ return &this->bladeCoordsYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeCoordsZDevice(uint turbine){ return &this->bladeCoordsZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesXDevice(uint turbine){ return &this->bladeVelocitiesXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesYDevice(uint turbine){ return &this->bladeVelocitiesYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeVelocitiesZDevice(uint turbine){ return &this->bladeVelocitiesZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesXDevice(uint turbine){ return &this->bladeForcesXDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesYDevice(uint turbine){ return &this->bladeForcesYDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + real* getTurbineBladeForcesZDevice(uint turbine){ return &this->bladeForcesZDCurrentTimestep[turbine*numberOfBladeNodes*numberOfBlades]; }; + + void setAllAzimuths(real* _azimuth); + void setAllOmegas(real* _omegas); + void setAllYaws(real* yaws); + + void setTurbineAzimuth(uint turbine, real azimuth){ azimuthsH[turbine] = azimuth; }; + void setTurbineYaw(uint turbine, real yaw){ yawsH[turbine] = yaw; }; + void setTurbineOmega(uint turbine, real omega){ omegasH[turbine] = omega; }; + + void setAllBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ); + void setAllBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ); + void setAllBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ); + + void setTurbineBladeCoords(uint turbine, real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ); + void setTurbineBladeVelocities(uint turbine, real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ); + void setTurbineBladeForces(uint turbine, real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ); + + virtual void calcBladeForces(); + +private: + void initTurbineGeometries(CudaMemoryManager* cudaManager); + void initBoundingSpheres(Parameter* para, CudaMemoryManager* cudaManager); + void initBladeCoords(CudaMemoryManager* cudaManager); + void initBladeVelocities(CudaMemoryManager* cudaManager); + void initBladeForces(CudaMemoryManager* cudaManager); + void initBladeIndices(Parameter* para, CudaMemoryManager* cudaManager); + + void calcForcesEllipticWing(); + void rotateBlades(real angle, uint turbineID); + + void writeBladeCoords(uint t); + void writeBladeForces(uint t); + void writeBladeVelocities(uint t); + + void swapDeviceArrays(); + +public: + real* bladeRadiiH; + real* bladeRadiiD; + real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH; + real* bladeCoordsXDPreviousTimestep, * bladeCoordsYDPreviousTimestep, * bladeCoordsZDPreviousTimestep; + real* bladeCoordsXDCurrentTimestep, * bladeCoordsYDCurrentTimestep, * bladeCoordsZDCurrentTimestep; + real* bladeVelocitiesXH, * bladeVelocitiesYH, * bladeVelocitiesZH; + real* bladeVelocitiesXDPreviousTimestep, * bladeVelocitiesYDPreviousTimestep, * bladeVelocitiesZDPreviousTimestep; + real* bladeVelocitiesXDCurrentTimestep, * bladeVelocitiesYDCurrentTimestep, * bladeVelocitiesZDCurrentTimestep; + real* bladeForcesXH, * bladeForcesYH, * bladeForcesZH; + real* bladeForcesXDPreviousTimestep, * bladeForcesYDPreviousTimestep, * bladeForcesZDPreviousTimestep; + real* bladeForcesXDCurrentTimestep, * bladeForcesYDCurrentTimestep, * bladeForcesZDCurrentTimestep; + uint* bladeIndicesH; + uint* bladeIndicesD; + uint* boundingSphereIndicesH; + uint* boundingSphereIndicesD; + real* turbinePosXH, *turbinePosYH, *turbinePosZH, *omegasH, *azimuthsH, *yawsH, *diametersH; + real* turbinePosXD, *turbinePosYD, *turbinePosZD, *omegasD, *azimuthsD, *yawsD, *diametersD; + +private: + std::vector<real> preInitPosX, preInitPosY, preInitPosZ, preInitDiameters, preInitOmegas, preInitAzimuths, preInitYaws; + std::vector<std::vector<real>> preInitBladeRadii; + const bool useHostArrays; + const real density; + real deltaT, deltaX; + const uint numberOfBladeNodes, numberOfBlades; + uint numberOfTurbines; + const real epsilon; // in m + const int level; + uint numberOfIndices; + uint numberOfNodes; + real forceRatio, factorGaussian, invEpsilonSqrd, invDeltaX; + int streamIndex; +}; + +#endif diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu deleted file mode 100644 index 71897bd21ea4fb299d3cc0ffa385506d4503f360..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu +++ /dev/null @@ -1,423 +0,0 @@ -#include "ActuatorLine.h" - -#include <cuda.h> -#include <cuda_runtime.h> -#include <helper_cuda.h> - -#include <cuda/CudaGrid.h> -#include "VirtualFluids_GPU/GPU/GeometryUtils.h" - -#include "Parameter/Parameter.h" -#include "DataStructureInitializer/GridProvider.h" -#include "GPU/CudaMemoryManager.h" - -__host__ __device__ __inline__ uint calcNode(uint bladeNode, uint nBladeNodes, uint blade, uint nBlades) -{ - return bladeNode+blade*nBladeNodes; -} - -__host__ __device__ __inline__ void calcBladeAndBladeNode(uint node, uint& bladeNode, uint nBladeNodes, uint& blade, uint nBlades) -{ - blade = node/nBladeNodes; - bladeNode = node - blade*nBladeNodes; -} - -__host__ __device__ __forceinline__ real distSqrd(real distX, real distY, real distZ) -{ - return distX*distX+distY*distY+distZ*distZ; -} - -__host__ __device__ __inline__ void rotateFromBladeToGlobal( - real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, - real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF, - real& azimuth, real& yaw) -{ - real tmpX, tmpY, tmpZ; - - rotateAboutX3D(azimuth, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, tmpX, tmpY, tmpZ); - rotateAboutZ3D(yaw, tmpX, tmpY, tmpZ, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF); - -} - -__host__ __device__ __inline__ void rotateFromGlobalToBlade( - real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, - real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF, - real& azimuth, real& yaw) -{ - real tmpX, tmpY, tmpZ; - - invRotateAboutZ3D(yaw, bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, tmpX, tmpY, tmpZ); - invRotateAboutX3D(azimuth, tmpX, tmpY, tmpZ, bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF); -} - -__global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, - uint* neighborsX, uint* neighborsY, uint* neighborsZ, uint* neighborsWSB, - real* vx, real* vy, real* vz, - real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, - real* bladeVelocitiesX, real* bladeVelocitiesY, real* bladeVelocitiesZ, - uint nBlades, uint nBladeNodes, - real azimuth, real yaw, real omega, - real turbPosX, real turbPosY, real turbPosZ, - uint* bladeIndices, real velocityRatio, real invDeltaX) -{ - const uint x = threadIdx.x; - const uint y = blockIdx.x; - const uint z = blockIdx.y; - - const uint nx = blockDim.x; - const uint ny = gridDim.x; - - const uint node = nx*(ny*z + y) + x; - - uint bladeNode, blade; - - calcBladeAndBladeNode(node, bladeNode, nBladeNodes, blade, nBlades); - - if(node>=nBladeNodes*nBlades) return; - - real bladeCoordX_BF = bladeCoordsX[node]; - real bladeCoordY_BF = bladeCoordsY[node]; - real bladeCoordZ_BF = bladeCoordsZ[node]; - - real bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF; - - real localAzimuth = azimuth+blade*c2Pi/nBlades; - - rotateFromBladeToGlobal(bladeCoordX_BF, bladeCoordY_BF, bladeCoordZ_BF, - bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, - localAzimuth, yaw); - - bladeCoordX_GF += turbPosX; - bladeCoordY_GF += turbPosY; - bladeCoordZ_GF += turbPosZ; - - uint k, ke, kn, kt; - uint kne, kte, ktn, ktne; - - k = findNearestCellBSW(bladeIndices[node], - gridCoordsX, gridCoordsY, gridCoordsZ, - bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, - neighborsX, neighborsY, neighborsZ, neighborsWSB); - - bladeIndices[node] = k; - - getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ); - - real dW, dE, dN, dS, dT, dB; - - real distX = invDeltaX*(bladeCoordX_GF-gridCoordsX[k]); - real distY = invDeltaX*(bladeCoordY_GF-gridCoordsY[k]); - real distZ = invDeltaX*(bladeCoordZ_GF-gridCoordsZ[k]); - - getInterpolationWeights(dW, dE, dN, dS, dT, dB, distX, distY, distZ); - - real bladeVelX_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vx)*velocityRatio; - real bladeVelY_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vy)*velocityRatio; - real bladeVelZ_GF = trilinearInterpolation(dW, dE, dN, dS, dT, dB, k, ke, kn, kt, kne, kte, ktn, ktne, vz)*velocityRatio; - - real bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF; - - rotateFromGlobalToBlade(bladeVelX_BF, bladeVelY_BF, bladeVelZ_BF, - bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, - localAzimuth, yaw); - - bladeVelocitiesX[node] = bladeVelX_BF; - bladeVelocitiesY[node] = bladeVelY_BF+omega*bladeCoordZ_BF; - bladeVelocitiesZ[node] = bladeVelZ_BF; -} - - -__global__ void applyBodyForces(real* gridCoordsX, real* gridCoordsY, real* gridCoordsZ, - real* gridForcesX, real* gridForcesY, real* gridForcesZ, - real* bladeCoordsX, real* bladeCoordsY, real* bladeCoordsZ, - real* bladeForcesX, real* bladeForcesY,real* bladeForcesZ, - uint nBlades, uint nBladeNodes, - real azimuth, real yaw, real omega, - real turbPosX, real turbPosY, real turbPosZ, - uint* gridIndices, uint nIndices, - real invEpsilonSqrd, real factorGaussian) -{ - const uint x = threadIdx.x; - const uint y = blockIdx.x; - const uint z = blockIdx.y; - - const uint nx = blockDim.x; - const uint ny = gridDim.x; - - const uint index = nx*(ny*z + y) + x; - - if(index>=nIndices) return; - - uint gridIndex = gridIndices[index]; - - real gridCoordX_RF = gridCoordsX[gridIndex] - turbPosX; - real gridCoordY_RF = gridCoordsY[gridIndex] - turbPosY; - real gridCoordZ_RF = gridCoordsZ[gridIndex] - turbPosZ; - - real gridForceX_RF = c0o1; - real gridForceY_RF = c0o1; - real gridForceZ_RF = c0o1; - - real dAzimuth = c2Pi/nBlades; - - for( uint blade=0; blade<nBlades; blade++) - { - real localAzimuth = azimuth+blade*dAzimuth; - - real gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF; - - rotateFromGlobalToBlade(gridCoordX_BF, gridCoordY_BF, gridCoordZ_BF, - gridCoordX_RF, gridCoordY_RF, gridCoordZ_RF, - localAzimuth, yaw); - - for( uint bladeNode=0; bladeNode<nBladeNodes; bladeNode++) - { - uint node = calcNode(bladeNode, nBladeNodes, blade, nBlades); - - real eta = factorGaussian*exp(-distSqrd(bladeCoordsX[node]-gridCoordX_BF, bladeCoordsY[node]-gridCoordY_BF, bladeCoordsZ[node]-gridCoordZ_BF)*invEpsilonSqrd); - - real forceX_RF, forceY_RF, forceZ_RF; - - rotateFromBladeToGlobal(bladeForcesX[node], bladeForcesY[node], bladeForcesZ[node], - forceX_RF, forceY_RF, forceZ_RF, - localAzimuth, yaw); - - gridForceX_RF += forceX_RF*eta; - gridForceY_RF += forceY_RF*eta; - gridForceZ_RF += forceZ_RF*eta; - } - } - - atomicAdd(&gridForcesX[gridIndex], gridForceX_RF); - atomicAdd(&gridForcesY[gridIndex], gridForceY_RF); - atomicAdd(&gridForcesZ[gridIndex], gridForceZ_RF); -} - - -void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) -{ - if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorLine but BodyForce is not set in Parameter."); - this->initBladeRadii(cudaMemoryManager); - this->initBladeCoords(cudaMemoryManager); - this->initBladeIndices(para, cudaMemoryManager); - this->initBladeVelocities(cudaMemoryManager); - this->initBladeForces(cudaMemoryManager); - this->initBoundingSphere(para, cudaMemoryManager); -} - - -void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, unsigned int t) -{ - if (level != this->level) return; - - cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); - - vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nNodes); - - interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads >>>( - para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, - para->getParD(this->level)->neighborX, para->getParD(this->level)->neighborY, para->getParD(this->level)->neighborZ, para->getParD(this->level)->neighborInverse, - para->getParD(this->level)->velocityX, para->getParD(this->level)->velocityY, para->getParD(this->level)->velocityZ, - this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD, - this->bladeVelocitiesXD, this->bladeVelocitiesYD, this->bladeVelocitiesZD, - this->nBlades, this->nBladeNodes, - this->azimuth, this->yaw, this->omega, - this->turbinePosX, this->turbinePosY, this->turbinePosZ, - this->bladeIndicesD, para->getVelocityRatio(), this->invDeltaX); - - cudaMemoryManager->cudaCopyBladeVelocitiesDtoH(this); - - this->calcBladeForces(); - - cudaMemoryManager->cudaCopyBladeForcesHtoD(this); - - vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->nIndices); - - applyBodyForces<<<sphereGrid.grid, sphereGrid.threads>>>( - para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ, - para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP, - this->bladeCoordsXD, this->bladeCoordsYD, this->bladeCoordsZD, - this->bladeForcesXD, this->bladeForcesYD, this->bladeForcesZD, - this->nBlades, this->nBladeNodes, - this->azimuth, this->yaw, this->omega, - this->turbinePosX, this->turbinePosY, this->turbinePosZ, - this->boundingSphereIndicesD, this->nIndices, - this->invEpsilonSqrd, this->factorGaussian); - - this->azimuth = fmod(this->azimuth+this->omega*this->deltaT,c2Pi); -} - - -void ActuatorLine::free(Parameter* para, CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaFreeBladeRadii(this); - cudaMemoryManager->cudaFreeBladeCoords(this); - cudaMemoryManager->cudaFreeBladeVelocities(this); - cudaMemoryManager->cudaFreeBladeForces(this); - cudaMemoryManager->cudaFreeBladeIndices(this); - cudaMemoryManager->cudaFreeSphereIndices(this); -} - - -void ActuatorLine::calcForcesEllipticWing() -{ - uint node; - real u_rel, v_rel, u_rel_sq; - real phi; - real Cl = c1o1; - real Cd = c0o1; - real c0 = c1o1; - - real c, Cn, Ct; - - for( uint blade=0; blade<this->nBlades; blade++) - { - for( uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++) - { - node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades); - - u_rel = this->bladeVelocitiesXH[node]; - v_rel = this->bladeVelocitiesYH[node]; - u_rel_sq = u_rel*u_rel+v_rel*v_rel; - phi = atan2(u_rel, v_rel); - - real tmp = c4o1*this->bladeRadiiH[bladeNode]/this->diameter-c1o1; - c = c0 * sqrt( c1o1- tmp*tmp ); - Cn = Cl*cos(phi)+Cd*sin(phi); - Ct = Cl*sin(phi)-Cd*cos(phi); - - this->bladeForcesXH[node] = -c1o2*u_rel_sq*c*this->density*Cn; - this->bladeForcesYH[node] = -c1o2*u_rel_sq*c*this->density*Ct; - this->bladeForcesZH[node] = c0o1; - } - } -} - -void ActuatorLine::calcBladeForces() -{ - this->calcForcesEllipticWing(); -} - -void ActuatorLine::initBladeRadii(CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaAllocBladeRadii(this); - - real dr = c1o2*this->diameter/this->nBladeNodes; - - for(uint node=0; node<this->nBladeNodes; node++) - { - this->bladeRadiiH[node] = dr*(node+1); - } - cudaMemoryManager->cudaCopyBladeRadiiHtoD(this); - - real dxOPiSqrtEps = pow(this->deltaX/(this->epsilon*sqrt(cPi)),c3o1); - this->factorGaussian = dr*dxOPiSqrtEps/this->forceRatio; -} - -void ActuatorLine::initBladeCoords(CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaAllocBladeCoords(this); - - for(uint blade=0; blade<this->nBlades; blade++) - { - for(uint bladeNode=0; bladeNode<this->nBladeNodes; bladeNode++) - { - uint node = calcNode(bladeNode, this->nBladeNodes, blade, this->nBlades); - - this->bladeCoordsXH[node] = c0o1; - this->bladeCoordsYH[node] = c0o1; - this->bladeCoordsZH[node] = this->bladeRadiiH[bladeNode]; - } - } - cudaMemoryManager->cudaCopyBladeCoordsHtoD(this); -} - -void ActuatorLine::initBladeVelocities(CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaAllocBladeVelocities(this); - - for(uint node=0; node<this->nNodes; node++) - { - this->bladeVelocitiesXH[node] = c0o1; - this->bladeVelocitiesYH[node] = c0o1; - this->bladeVelocitiesZH[node] = c0o1; - } - cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this); -} - -void ActuatorLine::initBladeForces(CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaAllocBladeForces(this); - - for(uint node=0; node<this->nNodes; node++) - { - this->bladeForcesXH[node] = c0o1; - this->bladeForcesYH[node] = c0o1; - this->bladeForcesZH[node] = c0o1; - } - cudaMemoryManager->cudaCopyBladeForcesHtoD(this); -} - -void ActuatorLine::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager) -{ - cudaMemoryManager->cudaAllocBladeIndices(this); - - for(uint node=0; node<this->nNodes; node++) - - { - this->bladeIndicesH[node] = 1; - } - cudaMemoryManager->cudaCopyBladeIndicesHtoD(this); -} - -void ActuatorLine::initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMemoryManager) -{ - // Actuator line exists only on 1 level - std::vector<int> nodesInSphere; - real sphereRadius = c1o2*this->diameter+c4o1*this->epsilon; - real sphereRadiusSqrd = sphereRadius*sphereRadius; - - for (uint j = 1; j <= para->getParH(this->level)->numberOfNodes; j++) - { - const real distX = para->getParH(this->level)->coordinateX[j]-this->turbinePosX; - const real distY = para->getParH(this->level)->coordinateY[j]-this->turbinePosY; - const real distZ = para->getParH(this->level)->coordinateZ[j]-this->turbinePosZ; - if(distSqrd(distX,distY,distZ) < sphereRadiusSqrd) nodesInSphere.push_back(j); - } - - this->nIndices = uint(nodesInSphere.size()); - cudaMemoryManager->cudaAllocSphereIndices(this); - std::copy(nodesInSphere.begin(), nodesInSphere.end(), this->boundingSphereIndicesH); - cudaMemoryManager->cudaCopySphereIndicesHtoD(this); -} - -void ActuatorLine::setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ) -{ - - for(uint node=0; node<this->nNodes; node++) - { - this->bladeCoordsXH[node] = _bladeCoordsX[node]; - this->bladeCoordsYH[node] = _bladeCoordsY[node]; - this->bladeCoordsZH[node] = _bladeCoordsZ[node]; - } -} - -void ActuatorLine::setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ) -{ - for(uint node=0; node<this->nNodes; node++) - { - this->bladeVelocitiesXH[node] = _bladeVelocitiesX[node]; - this->bladeVelocitiesYH[node] = _bladeVelocitiesY[node]; - this->bladeVelocitiesZH[node] = _bladeVelocitiesZ[node]; - } -} - -void ActuatorLine::setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ) -{ - for(uint node=0; node<this->nNodes; node++) - { - this->bladeForcesXH[node] = _bladeForcesX[node]; - this->bladeForcesYH[node] = _bladeForcesY[node]; - this->bladeForcesZH[node] = _bladeForcesZ[node]; - } -} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h deleted file mode 100644 index b44c89c5020eb206baa3bba1994b1e45f760c3bb..0000000000000000000000000000000000000000 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef ActuatorLine_H -#define ActuatorLine_H - -#include "PreCollisionInteractor.h" -#include "PointerDefinitions.h" -#include "VirtualFluids_GPU_export.h" -#include "lbm/constants/NumericConstants.h" - -class Parameter; -class GridProvider; - -using namespace vf::lbm::constant; -class VIRTUALFLUIDS_GPU_EXPORT ActuatorLine : public PreCollisionInteractor -{ -public: - ActuatorLine( - const uint _nBlades, - const real _density, - const uint _nBladeNodes, - const real _epsilon, - real _turbinePosX, real _turbinePosY, real _turbinePosZ, - const real _diameter, - int _level, - const real _deltaT, - const real _deltaX - ) : nBlades(_nBlades), - density(_density), - nBladeNodes(_nBladeNodes), - epsilon(_epsilon), - turbinePosX(_turbinePosX), turbinePosY(_turbinePosY), turbinePosZ(_turbinePosZ), - diameter(_diameter), - level(_level), - PreCollisionInteractor() - { - this->deltaT = _deltaT*exp2(-this->level); - this->deltaX = _deltaX*exp2(-this->level); - this->invDeltaX = c1o1/this->deltaX; - this->forceRatio = this->density*pow(this->deltaX,4)*pow(this->deltaT,-2); - this->invEpsilonSqrd = c1o1/(this->epsilon*this->epsilon); - this->nNodes = this->nBladeNodes*this->nBlades; - this->omega = c1o1; - this->azimuth = c0o1; - this->yaw = c0o1; - }; - - virtual ~ActuatorLine(){}; - - void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override; - void interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t) override; - void free(Parameter* para, CudaMemoryManager* cudaMemoryManager) override; - void write(uint t); - - uint getNBladeNodes(){ return this->nBladeNodes; }; - uint getNBlades(){ return this->nBlades;}; - uint getNIndices(){ return this->nIndices; }; - uint getNNodes(){ return this->nNodes; }; - real getOmega(){ return this->omega; }; - real getAzimuth(){ return this->azimuth; }; - real getYaw(){ return this->yaw; }; - real getDensity(){ return this->density; }; - real getPositionX(){ return this->turbinePosX; }; - real getPositionY(){ return this->turbinePosY; }; - real getPositionZ(){ return this->turbinePosZ; }; - real* getBladeRadii(){ return this->bladeRadiiH; }; - real* getBladeCoordsX(){ return this->bladeCoordsXH; }; - real* getBladeCoordsY(){ return this->bladeCoordsYH; }; - real* getBladeCoordsZ(){ return this->bladeCoordsZH; }; - real* getBladeVelocitiesX(){ return this->bladeVelocitiesXH; }; - real* getBladeVelocitiesY(){ return this->bladeVelocitiesYH; }; - real* getBladeVelocitiesZ(){ return this->bladeVelocitiesZH; }; - real* getBladeForcesX(){ return this->bladeForcesXH; }; - real* getBladeForcesY(){ return this->bladeForcesYH; }; - real* getBladeForcesZ(){ return this->bladeForcesZH; }; - - void setOmega(real _omega){ this->omega = _omega; }; - void setAzimuth(real _azimuth){ this->azimuth = _azimuth; }; - void setYaw(real _yaw){ this->yaw = _yaw; }; - void setBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ); - void setBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ); - void setBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ); - virtual void calcBladeForces(); - -private: - void initBoundingSphere(Parameter* para, CudaMemoryManager* cudaMemoryManager); - - void initBladeRadii(CudaMemoryManager* cudaMemoryManager); - void initBladeCoords(CudaMemoryManager* cudaMemoryManager); - void initBladeVelocities(CudaMemoryManager* cudaMemoryManager); - void initBladeForces(CudaMemoryManager* cudaMemoryManager); - void initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemoryManager); - - void calcForcesEllipticWing(); - -public: - real* bladeRadiiH; - real* bladeRadiiD; - real* bladeCoordsXH, * bladeCoordsYH, * bladeCoordsZH; - real* bladeCoordsXD, * bladeCoordsYD, * bladeCoordsZD; - real* bladeVelocitiesXH, * bladeVelocitiesYH, * bladeVelocitiesZH; - real* bladeVelocitiesXD, * bladeVelocitiesYD, * bladeVelocitiesZD; - real* bladeForcesXH, * bladeForcesYH, * bladeForcesZH; - real* bladeForcesXD, * bladeForcesYD, * bladeForcesZD; - uint* bladeIndicesH; - uint* bladeIndicesD; - uint* boundingSphereIndicesH; - uint* boundingSphereIndicesD; - -private: - const real density; - real turbinePosX, turbinePosY, turbinePosZ; - real omega, azimuth, yaw, deltaT, deltaX, invDeltaX, forceRatio, factorGaussian, invEpsilonSqrd; - const real diameter; - const uint nBladeNodes; - const uint nBlades; - const real epsilon; // in m - const int level; - uint nIndices, nNodes; -}; - -#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h index a9b233f3035890c2617d3a00b639f995be6c218f..f9a87f613e7607301e59a7c1e67eb556418892e4 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h @@ -33,6 +33,7 @@ public: virtual void init(Parameter *para, GridProvider *gridProvider, CudaMemoryManager *cudaMemoryManager) = 0; virtual void interact(Parameter *para, CudaMemoryManager *cudaMemoryManager, int level, uint t) = 0; virtual void free(Parameter *para, CudaMemoryManager *cudaMemoryManager) = 0; + virtual void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) = 0; protected: uint updateInterval; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu new file mode 100644 index 0000000000000000000000000000000000000000..1a8260ef936e2707fb38fbbba71cdbfac692f350 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu @@ -0,0 +1,359 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PrecursorWriter.cu +//! \ingroup PreCollisionInteractor +//! \author Henrik Asmuth, Henry Korb +//====================================================================================== +#include "PrecursorWriter.h" +#include "basics/writer/WbWriterVtkXmlImageBinary.h" + +#include <cuda.h> +#include <cuda_runtime.h> +#include <helper_cuda.h> +#include "cuda/CudaGrid.h" +#include "LBM/GPUHelperFunctions/KernelUtilities.h" + +#include "Core/StringUtilities/StringUtil.h" + +#include "Parameter/Parameter.h" +#include "DataStructureInitializer/GridProvider.h" +#include "GPU/CudaMemoryManager.h" + +using namespace vf::lbm::dir; +using namespace vf::gpu; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//TODO check everything for multiple level +void index1d(int& idx, int y, int z, int ny, int nz) +{ + idx = y+ny*z; +} + +void index2d(int idx, int& y, int& z, int ny, int nz) +{ + z = idx/ny; + y = idx-ny*z; +} + +__inline__ __host__ __device__ uint linearIdx(const uint component, const uint node, const uint timestep, const uint numberOfComponents, const uint numberOfNodes) +{ + return node+numberOfNodes*(component+numberOfComponents*timestep); +} + +__inline__ __host__ __device__ uint linearIdx(const uint component, const uint node, const uint numberOfNodes) +{ + return node+component*numberOfNodes; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void fillArrayVelocities(const uint numberOfPrecursorNodes, + uint* indices, + real *precursorData, + real *vx, + real *vy, + real *vz, + real velocityRatio) + + +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); + + if(nodeIndex>=numberOfPrecursorNodes) return; + + precursorData[linearIdx(0u, nodeIndex, numberOfPrecursorNodes)] = vx[indices[nodeIndex]]*velocityRatio; + precursorData[linearIdx(1u, nodeIndex, numberOfPrecursorNodes)] = vy[indices[nodeIndex]]*velocityRatio; + precursorData[linearIdx(2u, nodeIndex, numberOfPrecursorNodes)] = vz[indices[nodeIndex]]*velocityRatio; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +__global__ void fillArrayDistributions( uint numberOfPrecursorNodes, + uint* indices, + real* precursorData, + real* distributions, + uint* neighborX, uint* neighborY, uint* neighborZ, + bool isEvenTimestep, + unsigned long numberOfLBnodes) +{ + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim. + //! + const unsigned nodeIndex = vf::gpu::getNodeIndex(); + + if(nodeIndex>=numberOfPrecursorNodes) return; + + Distributions27 dist; + getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep); + + //////////////////////////////////////////////////////////////////////////////// + // ! - Set neighbor indices (necessary for indirect addressing) + uint k_000 = indices[nodeIndex]; + // uint k_M00 = neighborX[k_000]; + uint k_0M0 = neighborY[k_000]; + uint k_00M = neighborZ[k_000]; + // uint k_MM0 = neighborY[k_M00]; + // uint k_M0M = neighborZ[k_M00]; + uint k_0MM = neighborZ[k_0M0]; + // uint k_MMM = neighborZ[k_MM0]; + + //////////////////////////////////////////////////////////////////////////////////// + //! - Get local distributions in PX directions + //! + precursorData[linearIdx(PrecP00, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000]; + precursorData[linearIdx(PrecPP0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000]; + precursorData[linearIdx(PrecPM0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0]; + precursorData[linearIdx(PrecP0P, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000]; + precursorData[linearIdx(PrecP0M, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M]; + precursorData[linearIdx(PrecPPP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000]; + precursorData[linearIdx(PrecPMP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0]; + precursorData[linearIdx(PrecPPM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M]; + precursorData[linearIdx(PrecPMM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM]; +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) +{ + VF_LOG_INFO("PrecursorWriter: Start initializing..."); + VF_LOG_INFO("Writing yz-planes at x={}m every {}. timestep, starting at t={}", this->xPos, this->tSave, this->tStartOut); + + precursorStructs.resize(para->getMaxLevel()+1); + for(int level=0; level<=para->getMaxLevel(); level++) + { + + real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]); + int maxPoints = (int((yMax-yMin)/dx)+1)* (int((zMax-zMin)/dx)+1); + + real lowestY, lowestZ, highestY, highestZ; + + lowestY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes-1]; + highestY = para->getParH(level)->coordinateY[1]; + + lowestZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes-1]; + highestZ = para->getParH(level)->coordinateZ[1]; + + std::vector<uint> indicesOnGrid; + std::vector<int> indicesOnPlane; + std::vector<real> coordY, coordZ; + + for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ ) + { + real pointCoordX = para->getParH(level)->coordinateX[pos]; + real pointCoordY = para->getParH(level)->coordinateY[pos]; + real pointCoordZ = para->getParH(level)->coordinateZ[pos]; + if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID && + pointCoordX < (dx+xPos) && pointCoordX >= xPos && + pointCoordY<=yMax && pointCoordY>=yMin && + pointCoordZ<=zMax && pointCoordZ>=zMin) + { + highestY = max(highestY, pointCoordY); + highestZ = max(highestZ, pointCoordZ); + + lowestY = min(lowestY, pointCoordY); + lowestZ = min(lowestZ, pointCoordZ); + indicesOnGrid.push_back((uint)pos); + coordY.push_back(pointCoordY); + coordZ.push_back(pointCoordZ); + } + } + if(indicesOnGrid.size()==0) + throw std::runtime_error("PrecursorWriter did not find any points on the grid"); + + int ny = int((highestY-lowestY)/dx)+1; + int nz = int((highestZ-lowestZ)/dx)+1; + + for(uint i=0;i<indicesOnGrid.size(); i++) + { + int idxY = int((coordY[i]-lowestY)/dx); + int idxZ = int((coordZ[i]-lowestZ)/dx); + int idx; + index1d(idx, idxY, idxZ, ny, nz); + indicesOnPlane.push_back(idx); + } + + precursorStructs[level] = SPtr<PrecursorStruct>(new PrecursorStruct); + precursorStructs[level]->numberOfPointsInBC = (uint)indicesOnGrid.size(); + precursorStructs[level]->indicesOnPlane = (int*) malloc(precursorStructs[level]->numberOfPointsInBC*sizeof(int)); + precursorStructs[level]->spacing = makeUbTuple(dx, dx, tSave*para->getTimeRatio()*pow(2,-level)); + precursorStructs[level]->origin = makeUbTuple(lowestY, lowestZ); + precursorStructs[level]->extent = makeUbTuple(0, ny-1, 0, nz-1); + precursorStructs[level]->numberOfPointsInData = ny*nz; + precursorStructs[level]->numberOfTimestepsPerFile = min(para->getlimitOfNodesForVTK()/(ny*nz), maxtimestepsPerFile); + precursorStructs[level]->numberOfFilesWritten = 0; + precursorStructs[level]->numberOfTimestepsBuffered = 0; + + switch (outputVariable) + { + case OutputVariable::Velocities: + precursorStructs[level]->numberOfQuantities = 3; + break; + case OutputVariable::Distributions: + precursorStructs[level]->numberOfQuantities = 9; + break; + + default: + break; + } + + cudaManager->cudaAllocPrecursorWriter(this, level); + + std::copy(indicesOnGrid.begin(), indicesOnGrid.end(), precursorStructs[level]->indicesH); + std::copy(indicesOnPlane.begin(), indicesOnPlane.end(), precursorStructs[level]->indicesOnPlane); + + cudaManager->cudaCopyPrecursorWriterIndicesHtoD(this, level); + + VF_LOG_INFO("Found {} points in precursor plane on level {}", precursorStructs[level]->numberOfPointsInBC, level); + } + VF_LOG_INFO("PrecursorWriter: Done initializing."); +} + + +void PrecursorWriter::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) +{ + uint t_level = para->getTimeStep(level, t, true); + uint tStartOut_level = tStartOut*pow(2, level); + uint tEnd_level = para->getTimestepEnd()*pow(2, level); + + if(t_level>tStartOut_level && ((t_level-tStartOut_level) % tSave)==0) + { + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, precursorStructs[level]->numberOfPointsInBC); + + if(this->outputVariable==OutputVariable::Velocities) + { + fillArrayVelocities<<<grid.grid, grid.threads>>>( precursorStructs[level]->numberOfPointsInBC, precursorStructs[level]->indicesD, + precursorStructs[level]->bufferD, + para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, + para->getVelocityRatio()); + getLastCudaError("In PrecursorWriter::interact fillArrayVelocities execution failed"); + } + else if(this->outputVariable==OutputVariable::Distributions) + { + fillArrayDistributions<<<grid.grid, grid.threads>>>(precursorStructs[level]->numberOfPointsInBC, precursorStructs[level]->indicesD, + precursorStructs[level]->bufferD, + para->getParD(level)->distributions.f[0], + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, + para->getEvenOrOdd(level), para->getParD(level)->numberOfNodes); + getLastCudaError("In PrecursorWriter::interact fillArrayDistributions execution failed"); + } + cudaManager->cudaCopyPrecursorWriterOutputVariablesDtoH(this, level); + + // switch device buffer and data pointer so precursor data is gathered in buffer and copied from bufferD to bufferH + real *tmp = precursorStructs[level]->bufferD; + precursorStructs[level]->bufferD = precursorStructs[level]->dataD; + precursorStructs[level]->dataD = tmp; + + precursorStructs[level]->numberOfTimestepsBuffered++; + + if(precursorStructs[level]->numberOfTimestepsBuffered >= precursorStructs[level]->numberOfTimestepsPerFile || t == para->getTimestepEnd()) + { + // switch host buffer and data pointer so precursor data is copied in buffer and written from data + + tmp = precursorStructs[level]->bufferH; + precursorStructs[level]->bufferH = precursorStructs[level]->dataH; + precursorStructs[level]->dataH = tmp; + + writeFuture.wait(); + writeFuture = std::async(std::launch::async, [this](Parameter* para, uint level, uint timesteps){ this->write(para, level, timesteps); }, para, level, precursorStructs[level]->numberOfTimestepsBuffered); + precursorStructs[level]->numberOfTimestepsBuffered = 0; + } + } +} + + +void PrecursorWriter::free(Parameter* para, CudaMemoryManager* cudaManager) +{ + writeFuture.wait(); + for(int level=0; level<=para->getMaxLevel(); level++) + { + if(getPrecursorStruct(level)->numberOfTimestepsBuffered>0) + write(para, level, getPrecursorStruct(level)->numberOfTimestepsBuffered); + + cudaManager->cudaFreePrecursorWriter(this, level); + } +} + + +void PrecursorWriter::write(Parameter* para, int level, uint numberOfTimestepsBuffered) +{ + std::string fname = this->makeFileName(fileName, level, para->getMyProcessID(), precursorStructs[level]->numberOfFilesWritten) + getWriter()->getFileExtension(); + std::string wholeName = outputPath + "/" + fname; + + uint numberOfPointsInData = precursorStructs[level]->numberOfPointsInData; + + int startTime = precursorStructs[level]->numberOfFilesWritten*precursorStructs[level]->numberOfTimestepsPerFile; + + UbTupleInt6 extent = makeUbTuple( val<1>(precursorStructs[level]->extent), val<2>(precursorStructs[level]->extent), + val<3>(precursorStructs[level]->extent), val<4>(precursorStructs[level]->extent), + startTime, startTime+(int)numberOfTimestepsBuffered-1); + + UbTupleFloat3 origin = makeUbTuple( val<1>(precursorStructs[level]->origin), val<2>(precursorStructs[level]->origin), 0.f); + + std::vector<std::vector<double>> nodedata; + + for(uint quant=0; quant<precursorStructs[level]->numberOfQuantities; quant++) + { + std::vector<double> doubleArr(numberOfPointsInData*numberOfTimestepsBuffered, NAN); + for( uint timestep=0; timestep<numberOfTimestepsBuffered; timestep++) + { + for (uint pos=0; pos < precursorStructs[level]->numberOfPointsInBC; pos++) + { + int indexOnPlane = precursorStructs[level]->indicesOnPlane[pos]+timestep*numberOfPointsInData; + doubleArr[indexOnPlane] = double(precursorStructs[level]->dataH[linearIdx(quant, pos, timestep, precursorStructs[level]->numberOfQuantities, precursorStructs[level]->numberOfPointsInBC)]); + } + } + nodedata.push_back(doubleArr); + } + + std::vector<std::vector<double>> celldata; + getWriter()->writeData(wholeName, nodedatanames, celldatanames, nodedata, celldata, extent, origin, precursorStructs[level]->spacing, extent, this->writePrecision); + precursorStructs[level]->numberOfFilesWritten++; +} + +std::string PrecursorWriter::makeFileName(std::string fileName, int level, int id, uint numberOfFilesWritten) +{ + return fileName + "_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_File_" + StringUtil::toString<int>(numberOfFilesWritten); +} + +void PrecursorWriter::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) +{ + for(uint level=0; level<(uint)para->getMaxLevel(); level++) + { + if(outputVariable==OutputVariable::Velocities) + { + std::vector<uint> indices(precursorStructs[level]->indicesH, precursorStructs[level]->indicesH+precursorStructs[level]->numberOfPointsInBC); + gridProvider->tagFluidNodeIndices(indices, CollisionTemplate::WriteMacroVars, level); + } + } +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h new file mode 100644 index 0000000000000000000000000000000000000000..264023b58ba6db46b50f6a85b334c530864a0b8f --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h @@ -0,0 +1,161 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PrecursorWriter.h +//! \author Henry Korb, Henrik Asmuth +//! \date 05/12/2022 +//! \brief Probe writing planes of data to be used as inflow data in successor simulation using PrecursorBC +//! +//! The probe writes out yz-planes at a specific x position ( \param xPos ) of either velocity or distributions +//! that can be read by PrecursorBC as inflow data. +//======================================================================================= + + +#ifndef PRECURSORPROBE_H_ +#define PRECURSORPROBE_H_ + +#include "PreCollisionInteractor.h" +#include "WbWriterVtkXmlImageBinary.h" +#include "LBM/LB.h" +#include <string> +#include <vector> +#include <future> +#include "PointerDefinitions.h" +#include "Logger.h" + +class Parameter; +class CudaMemoryManager; +class GridProvider; + +enum class OutputVariable { + //! - Velocities + Velocities, + //! - Distributions + Distributions +}; + +static constexpr uint PrecP00 = 0; +static constexpr uint PrecPP0 = 1; +static constexpr uint PrecPM0 = 2; +static constexpr uint PrecP0P = 3; +static constexpr uint PrecP0M = 4; +static constexpr uint PrecPPP = 5; +static constexpr uint PrecPMP = 6; +static constexpr uint PrecPPM = 7; +static constexpr uint PrecPMM = 8; + +struct PrecursorStruct +{ + uint numberOfPointsInBC, numberOfPointsInData, numberOfTimestepsPerFile, numberOfFilesWritten, numberOfTimestepsBuffered; + uint *indicesH, *indicesD; + real *dataH, *dataD; + real *bufferH, *bufferD; + uint numberOfQuantities; + UbTupleInt4 extent; + UbTupleFloat2 origin; + UbTupleFloat3 spacing; + int* indicesOnPlane; + cudaStream_t stream; +}; + +class PrecursorWriter : public PreCollisionInteractor +{ +public: + PrecursorWriter( + const std::string _fileName, + const std::string _outputPath, + real _xPos, + real _yMin, real _yMax, + real _zMin, real _zMax, + uint _tStartOut, + uint _tSave, + OutputVariable _outputVariable, + uint _maxTimestepsPerFile=uint(1e4) + ): + fileName(_fileName), + outputPath(_outputPath), + xPos(_xPos), + yMin(_yMin), + yMax(_yMax), + zMin(_zMin), + zMax(_zMax), + tStartOut(_tStartOut), + tSave(_tSave), + outputVariable(_outputVariable), + maxtimestepsPerFile(_maxTimestepsPerFile) + { + nodedatanames = determineNodeDataNames(); + writeFuture = std::async([](){}); + }; + + void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override; + void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override; + void free(Parameter* para, CudaMemoryManager* cudaManager) override; + void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override; + + OutputVariable getOutputVariable(){ return this->outputVariable; } + + SPtr<PrecursorStruct> getPrecursorStruct(int level){return precursorStructs[level];} + static std::string makeFileName(std::string fileName, int level, int id, uint part); + + void setWritePrecision(uint _writePrecision){ this->writePrecision=_writePrecision;} + +private: + WbWriterVtkXmlImageBinary* getWriter(){ return WbWriterVtkXmlImageBinary::getInstance(); }; + void write(Parameter* para, int level, uint numberOfTimestepsBuffered); + + std::vector<std::string> determineNodeDataNames() + { + switch (outputVariable) + { + case OutputVariable::Velocities: + return {"vx", "vy", "vz"}; + break; + case OutputVariable::Distributions: + return {"fP00", "fPP0", "fPM0", "fP0P", "fP0M", "fPPP", "fPMP", "fPPM", "fPMM"}; + break; + + default: + throw std::runtime_error("Invalid OutputVariable for PrecursorWriter"); + break; + } + } + +private: + std::vector<SPtr<PrecursorStruct>> precursorStructs; + std::string fileName, outputPath; + std::vector<std::string> nodedatanames; + std::vector<std::string> celldatanames; + uint tStartOut, tSave, maxtimestepsPerFile; + real xPos, yMin, yMax, zMin, zMax; + OutputVariable outputVariable; + std::future<void> writeFuture; + uint writePrecision = 8; +}; + +#endif //PRECURSORPROBE_H_ diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu index f5b520acfad74f6787e9e657fce3ccdceed9d539..e89d392b5d4bf5983f9bb47642fef81d0f06cc89 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu @@ -15,6 +15,7 @@ #include "Parameter/Parameter.h" #include "DataStructureInitializer/GridProvider.h" #include "GPU/CudaMemoryManager.h" +#include "GPU/GPU_Interface.h" #include <algorithm> @@ -235,7 +236,7 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider, } // Find all points along the normal direction - for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + for(size_t j = 1; j < para->getParH(level)->numberOfNodes; j++ ) { if(para->getParH(level)->typeOfGridNode[j] == GEO_FLUID) { @@ -250,16 +251,16 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::sort(pointCoordsNormal->begin(), pointCoordsNormal->end()); // Find all pointCoords in the first plane - for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ ) { - if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID && pointCoordsNormal_par[j] == pointCoordsNormal->at(0)) + if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID && pointCoordsNormal_par[pos] == pointCoordsNormal->at(0)) { //not needed in current state, might become relevant for two-point correlations // pointCoordsNormal->push_back( pointCoordsNormal_par[j] ); // pointCoordsInplane1->push_back( pointCoordsInplane1_par[j] ); // pointCoordsInplane2->push_back( pointCoordsInplane2_par[j] ); - probeIndices_level.push_back(j); + probeIndices_level.push_back((int)pos); } } } @@ -268,6 +269,23 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider, void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t_level, int level) { + // Compute macroscopic variables in entire domain + CalcMacCompSP27( + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->rho, + para->getParD(level)->pressure, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->numberofthreads, + para->getParD(level)->distributions.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("In PlanarAverageProbe Kernel CalcMacSP27 execution failed"); + // Definition of normal and inplane directions for moveIndices kernels uint *neighborNormal, *neighborInplane1, *neighborInplane2; if( this->planeNormal == 'x' ) diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h index d11f8e76e4d13113b201af5494b7d0cfcfe18353..3d3533f74501e776f9150c83c9d9101a0be7ecbc 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h @@ -74,6 +74,7 @@ public: planeNormal(_planeNormal) { + if (_tStartTmpAvg<_tStartAvg) throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!"); if(!(_planeNormal == 'x' || _planeNormal == 'y' || _planeNormal == 'z')) throw std::runtime_error("PlanarAverageProbe: planeNormal must be 'x', 'y' or 'z'!"); } diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu index 7d1c0205219737e4b28acbb1a893a0a6071ae9de..f55045505bff0e3b5b0b1426be4e9e1a3832d088 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu @@ -76,11 +76,11 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve int level) { real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]); - for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ ) { - real pointCoordX = para->getParH(level)->coordinateX[j]; - real pointCoordY = para->getParH(level)->coordinateY[j]; - real pointCoordZ = para->getParH(level)->coordinateZ[j]; + real pointCoordX = para->getParH(level)->coordinateX[pos]; + real pointCoordY = para->getParH(level)->coordinateY[pos]; + real pointCoordZ = para->getParH(level)->coordinateZ[pos]; real distX = pointCoordX - this->posX; real distY = pointCoordY - this->posY; real distZ = pointCoordZ - this->posZ; @@ -88,7 +88,7 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve if( distX <= this->deltaX && distY <= this->deltaY && distZ <= this->deltaZ && distX >=0.f && distY >=0.f && distZ >=0.f) { - probeIndices_level.push_back(j); + probeIndices_level.push_back((int)pos); distX_level.push_back( distX/dx ); distY_level.push_back( distY/dx ); distZ_level.push_back( distZ/dx ); @@ -106,4 +106,14 @@ void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* p para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ, para->getParD(level)->rho, para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, probeStruct->quantitiesD, probeStruct->arrayOffsetsD, probeStruct->quantitiesArrayD); +} + +void PlaneProbe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) +{ + for(int level=0; level<=para->getMaxLevel(); level++) + { + SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level); + std::vector<uint> probeIndices( probeStruct->pointIndicesH, probeStruct->pointIndicesH+probeStruct->nIndices); + gridProvider->tagFluidNodeIndices( probeIndices, CollisionTemplate::WriteMacroVars, level); + } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h index 3440c01020f9b3505be7148024e47373b76648ff..180169707a6d7f3f7975f6a2bc4009f7c0aba527 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h @@ -54,7 +54,7 @@ public: ): Probe(_probeName, _outputPath, _tStartAvg, - 0, + _tStartAvg+1, _tAvg, _tStartOut, _tOut, @@ -72,6 +72,8 @@ public: this->deltaZ = _deltaZ; } + void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override; + private: bool isAvailableStatistic(Statistic _variable) override; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu index e78a98f02ac2093fc46b4daa4a2485ed1395275b..89e1f6b87687ed42c079415a5340f1d385c8d62c 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu @@ -75,20 +75,20 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve { real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]); - for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ ) { for(uint point=0; point<this->pointCoordsX.size(); point++) { real pointCoordX = this->pointCoordsX[point]; real pointCoordY = this->pointCoordsY[point]; real pointCoordZ = this->pointCoordsZ[point]; - real distX = pointCoordX-para->getParH(level)->coordinateX[j]; - real distY = pointCoordY-para->getParH(level)->coordinateY[j]; - real distZ = pointCoordZ-para->getParH(level)->coordinateZ[j]; + real distX = pointCoordX-para->getParH(level)->coordinateX[pos]; + real distY = pointCoordY-para->getParH(level)->coordinateY[pos]; + real distZ = pointCoordZ-para->getParH(level)->coordinateZ[pos]; if( distX <=dx && distY <=dx && distZ <=dx && distX >0.f && distY >0.f && distZ >0.f) { - probeIndices_level.push_back(j); + probeIndices_level.push_back((int)pos); distX_level.push_back( distX/dx ); distY_level.push_back( distY/dx ); distZ_level.push_back( distZ/dx ); @@ -140,4 +140,14 @@ void PointProbe::addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real po } printf("Added %u points \n", n_y*n_z); +} + +void PointProbe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) +{ + for(int level=0; level<=para->getMaxLevel(); level++) + { + SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level); + std::vector<uint> probeIndices( probeStruct->pointIndicesH, probeStruct->pointIndicesH+probeStruct->nIndices); + gridProvider->tagFluidNodeIndices( probeIndices, CollisionTemplate::WriteMacroVars, level); + } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h index 6a6fbe76f089acfafc22672dd3e9d71bd193a3b3..08c359705f03b20fbd3276fe209b6ff4d782a5e5 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h @@ -64,6 +64,7 @@ public: void addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ); void addProbePointsFromXNormalPlane(real pos_x, real pos0_y, real pos0_z, real pos1_y, real pos1_z, uint n_y, uint n_z); + void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override; private: bool isAvailableStatistic(Statistic _variable) override; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu index cc027b07bded01455437e65e08ccdcd51bcf7dc0..03c18f5a9a2133bec244053113209abc70469a2a 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu @@ -187,7 +187,7 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* this->velocityRatio = std::bind(&Parameter::getScaledVelocityRatio, para, _1); this->densityRatio = std::bind(&Parameter::getScaledDensityRatio, para, _1); this->forceRatio = std::bind(&Parameter::getScaledForceRatio, para, _1); - this->stressRatio = std::bind(&Parameter::getScaledPressureRatio, para, _1); + this->stressRatio = std::bind(&Parameter::getScaledStressRatio, para, _1); this->viscosityRatio = std::bind(&Parameter::getScaledViscosityRatio, para, _1); this->nondimensional = std::bind(&Probe::getNondimensionalConversionFactor, this, _1); @@ -315,6 +315,12 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaMemoryManager) } } +void Probe::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) +{ + // Do nothing +}; + + void Probe::addStatistic(Statistic variable) { if (!this->isAvailableStatistic(variable)) throw std::runtime_error("Probe::addStatistic(): Statistic not available for this probe type!"); @@ -329,6 +335,22 @@ void Probe::addStatistic(Statistic variable) } } +std::string Probe::makeParallelFileName(int id, int t) +{ + return this->probeName + "_bin_ID_" + StringUtil::toString<int>(id) + + "_t_" + StringUtil::toString<int>(t) + + ".vtk"; +} + +std::string Probe::makeGridFileName(int level, int id, int t, uint part) +{ + return this->probeName + "_bin_lev_" + StringUtil::toString<int>(level) + + "_ID_" + StringUtil::toString<int>(id) + + "_Part_" + StringUtil::toString<int>(part) + + "_t_" + StringUtil::toString<int>(t) + + ".vtk"; +} + void Probe::addAllAvailableStatistics() { for( int var=0; var < int(Statistic::LAST); var++) @@ -347,119 +369,76 @@ void Probe::write(Parameter* para, int level, int t) std::vector<std::string> fnames; for (uint i = 1; i <= numberOfParts; i++) { - std::string fname = this->probeName + "_bin_lev_" + StringUtil::toString<int>(level) - + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) - + "_Part_" + StringUtil::toString<int>(i); - if(!this->outputTimeSeries) fname += "_t_" + StringUtil::toString<int>(t_write); - fname += ".vtk"; - fnames.push_back(fname); - this->fileNamesForCollectionFile.push_back(fname); + this->writeGridFile(para, level, t_write, i); } - this->writeGridFiles(para, level, fnames, t); - - if(level == 0 && !this->outputTimeSeries) this->writeCollectionFile(para, t); + if(level == 0&& !this->outputTimeSeries) this->writeParallelFile(para, t); } -void Probe::writeCollectionFile(Parameter* para, int t) +void Probe::writeParallelFile(Parameter* para, int t) { int t_write = this->fileNameLU ? t: t/this->tOut; - std::string filename = this->probeName + "_bin_ID_" + StringUtil::toString<int>(para->getMyProcessID()) - + "_t_" + StringUtil::toString<int>(t_write) - + ".vtk"; - - std::ofstream file; - - file.open(this->outputPath + "/" + filename + ".pvtu" ); - - ////////////////////////////////////////////////////////////////////////// - - file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl; - file << " <PUnstructuredGrid GhostLevel=\"1\">" << std::endl; - - file << " <PPointData>" << std::endl; - - for(std::string varName: this->getVarNames()) //TODO - { - file << " <DataArray type=\"Float64\" Name=\""<< varName << "\" /> " << std::endl; - } - file << " </PPointData>" << std::endl; - - file << " <PPoints>" << std::endl; - file << " <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl; - file << " </PPoints>" << std::endl; - - for( auto& fname : this->fileNamesForCollectionFile ) - { - const auto filenameWithoutPath=fname.substr( fname.find_last_of('/') + 1 ); - file << " <Piece Source=\"" << filenameWithoutPath << ".bin.vtu\"/>" << std::endl; - } - - file << " </PUnstructuredGrid>" << std::endl; - file << "</VTKFile>" << std::endl; + std::string filename = this->outputPath + "/" + this->makeParallelFileName(para->getMyProcessID(), t_write); - ////////////////////////////////////////////////////////////////////////// + std::vector<std::string> nodedatanames = this->getVarNames(); + std::vector<std::string> cellNames; - file.close(); + getWriter()->writeParallelFile(filename, fileNamesForCollectionFile, nodedatanames, cellNames); this->fileNamesForCollectionFile.clear(); } -void Probe::writeGridFiles(Parameter* para, int level, std::vector<std::string>& fnames, int t) +void Probe::writeGridFile(Parameter* para, int level, int t, uint part) { + std::string fname = this->outputPath + "/" + this->makeGridFileName(level, para->getMyProcessID(), t, part); + std::vector< UbTupleFloat3 > nodes; std::vector< std::string > nodedatanames = this->getVarNames(); - uint startpos = 0; - uint endpos = 0; - uint sizeOfNodes = 0; std::vector< std::vector< double > > nodedata(nodedatanames.size()); SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level); - for (uint part = 0; part < fnames.size(); part++) - { - startpos = part * para->getlimitOfNodesForVTK(); - uint nDataPoints = this->outputTimeSeries? this->tProbe: probeStruct->nPoints; - sizeOfNodes = min(para->getlimitOfNodesForVTK(), nDataPoints - startpos); - endpos = startpos + sizeOfNodes; + uint startpos = (part-1) * para->getlimitOfNodesForVTK(); + uint sizeOfNodes = min(para->getlimitOfNodesForVTK(), probeStruct->nPoints - startpos); + uint endpos = startpos + sizeOfNodes; - ////////////////////////////////////////////////////////////////////////// - nodes.resize(sizeOfNodes); + ////////////////////////////////////////////////////////////////////////// + nodes.resize(sizeOfNodes); - for (uint pos = startpos; pos < endpos; pos++) - { - nodes[pos-startpos] = makeUbTuple( float(probeStruct->pointCoordsX[pos]), - float(probeStruct->pointCoordsY[pos]), - float(probeStruct->pointCoordsZ[pos])); - } + for (uint pos = startpos; pos < endpos; pos++) + { + nodes[pos-startpos] = makeUbTuple( float(probeStruct->pointCoordsX[pos]), + float(probeStruct->pointCoordsY[pos]), + float(probeStruct->pointCoordsZ[pos])); + } - for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); + for( auto it=nodedata.begin(); it!=nodedata.end(); it++) it->resize(sizeOfNodes); - for( int var=0; var < int(Statistic::LAST); var++){ - if(this->quantities[var]) - { - Statistic statistic = static_cast<Statistic>(var); - real coeff; + for( int var=0; var < int(Statistic::LAST); var++){ + if(this->quantities[var]) + { + Statistic statistic = static_cast<Statistic>(var); + real coeff; - std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic); - uint n_arrs = uint(postProcessingVariables.size()); + std::vector<PostProcessingVariable> postProcessingVariables = this->getPostProcessingVariables(statistic); + uint n_arrs = uint(postProcessingVariables.size()); - uint arrOff = probeStruct->arrayOffsetsH[var]; - uint arrLen = probeStruct->nPoints; + uint arrOff = probeStruct->arrayOffsetsH[var]; + uint arrLen = probeStruct->nPoints; + + for(uint arr=0; arr<n_arrs; arr++) + { + coeff = postProcessingVariables[arr].conversionFactor(level); - for(uint arr=0; arr<n_arrs; arr++) + for (uint pos = startpos; pos < endpos; pos++) { - coeff = postProcessingVariables[arr].conversionFactor(level); - - for (uint pos = startpos; pos < endpos; pos++) - { - nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); - } + nodedata[arrOff+arr][pos-startpos] = double(probeStruct->quantitiesArrayH[(arrOff+arr)*arrLen+pos]*coeff); } } } - WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(this->outputPath + "/" + fnames[part], nodes, nodedatanames, nodedata); } + std::string fullName = getWriter()->writeNodesWithNodeData(fname, nodes, nodedatanames, nodedata); + this->fileNamesForCollectionFile.push_back(fullName.substr(fullName.find_last_of('/') + 1)); } std::vector<std::string> Probe::getVarNames() diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h index 9cb0bd43e27fb7a28cae9c363ce245fbd9cc5677..aaf294e87d23c64707a16692b9337d6e9ff9c896 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h @@ -49,6 +49,7 @@ #include "PreCollisionInteractor/PreCollisionInteractor.h" #include "PointerDefinitions.h" +#include "WbWriterVtkXmlBinary.h" //======================================================================================= //! \note How to add new Statistics @@ -153,12 +154,12 @@ public: PreCollisionInteractor() { if (_tStartOut<_tStartAvg) throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!"); - if (_tStartTmpAvg<_tStartAvg) throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!"); } void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override; void interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t) override; void free(Parameter* para, CudaMemoryManager* cudaMemoryManager) override; + virtual void getTaggedFluidNodes(Parameter *para, GridProvider* gridProvider) override; SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; } @@ -171,6 +172,8 @@ public: void setFileNameToNOut(){this->fileNameLU = false;} void setTStartTmpAveraging(uint _tStartTmpAveraging){this->tStartTmpAveraging = _tStartTmpAveraging;} +protected: + virtual WbWriterVtkXmlBinary* getWriter(){ return WbWriterVtkXmlBinary::getInstance(); }; real getNondimensionalConversionFactor(int level); private: @@ -188,12 +191,15 @@ private: int level); virtual void calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level) = 0; - void write(Parameter* para, int level, int t); - void writeCollectionFile(Parameter* para, int t); - void writeGridFiles(Parameter* para, int level, std::vector<std::string >& fnames, int t); + virtual void write(Parameter* para, int level, int t); + virtual void writeParallelFile(Parameter* para, int t); + virtual void writeGridFile(Parameter* para, int level, int t, uint part); + std::vector<std::string> getVarNames(); - -private: + std::string makeGridFileName(int level, int id, int t, uint part); + std::string makeParallelFileName(int id, int t); + +protected: const std::string probeName; const std::string outputPath; @@ -202,7 +208,6 @@ private: bool hasDeviceQuantityArray; //!> flag initiating memCopy in Point and PlaneProbe. Other probes are only based on thrust reduce functions and therefore dont need explict memCopy in interact() bool outputTimeSeries; //!> flag initiating overwrite of output vtk files, skipping collection files and limiting the length of the written data to the current time step (currently only used for WallModelProbe) std::vector<std::string> fileNamesForCollectionFile; - std::vector<std::string> varNames; bool fileNameLU = true; //!> if true, written file name contains time step in LU, else is the number of the written probe files @@ -215,7 +220,6 @@ protected: uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries - std::function<real(int)> velocityRatio; std::function<real(int)> densityRatio; std::function<real(int)> forceRatio; diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu index 81da15595baae55aa562bc77e24442a9258d992f..3341111c134ace7ca6ff64eeb7f87b38f8014656 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu @@ -171,11 +171,11 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std { if (!para->getIsBodyForce()) throw std::runtime_error("WallModelProbe::findPoints(): bodyforce not allocated!"); // Find all fluid nodes - for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ ) + for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ ) { - if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID) + if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) { - probeIndices_level.push_back(j); + probeIndices_level.push_back((int)pos); } } } diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h index d6464c5ca2aa60310cc6bb7ca0a210bc12e755ff..4ea90f74c7a0d57af4995e1b5874234967f1e901 100644 --- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h +++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.h @@ -55,14 +55,17 @@ public: uint _tStartOut, uint _tOut ): Probe(_probeName, - _outputPath, - _tStartAvg, - _tStartTmpAvg, - _tAvg, - _tStartOut, - _tOut, - false, - true){} + _outputPath, + _tStartAvg, + _tStartTmpAvg, + _tAvg, + _tStartOut, + _tOut, + false, + true) + { + if (_tStartTmpAvg<_tStartAvg) throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!"); + } void setForceOutputToStress(bool _outputStress){ this->outputStress = _outputStress; } diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu index e43fb54a6b56b4d9a501269544cea000df31cdb7..60dbb2228e6d01fdabf7a6e1bfca786e2104d5b0 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu @@ -2,6 +2,7 @@ #include "InitCompAD27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_p void InitCompAD27::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_Comp_AD_27 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->Conc, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->distributionsAD27.f[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("InitAD27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_Comp_AD_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->Conc, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->distributionsAD27.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_Comp_AD_27 execution failed"); } bool InitCompAD27::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu index 8a53dff5c14adef69aa012bdf1d870d62a9749b2..8097ee13d9064c4104ead8cd8eb5ba529d8972fc 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu @@ -2,6 +2,7 @@ #include "InitCompAD7_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Paramet void InitCompAD7::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_Comp_AD_7 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->Conc, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->distributionsAD7.f[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("InitAD7 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_Comp_AD_7 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->Conc, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->distributionsAD7.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_Comp_AD_7 execution failed"); } bool InitCompAD7::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu index 23ec3e5293ec3a49bf632a720ab554d156dc9674..c4676f28f969e2db8ff7f1910ac784a1c0dab351 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu @@ -2,6 +2,7 @@ #include "InitCompSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,27 +11,12 @@ std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_p void InitCompSP27::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); if( ! para->getUseInitNeq() ) { - LB_Init_Comp_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX, + LB_Init_Comp_SP_27 <<< grid.grid, grid.threads >>> ( + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->typeOfGridNode, @@ -41,11 +27,12 @@ void InitCompSP27::init(int level) para->getParD(level)->numberOfNodes, para->getParD(level)->distributions.f[0], para->getParD(level)->isEvenTimestep); - getLastCudaError("LBInitSP27 execution failed"); + getLastCudaError("LB_Init_Comp_SP_27 execution failed"); } else { - LB_Init_Comp_Neq_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX, + LB_Init_Comp_Neq_SP_27 <<< grid.grid, grid.threads >>> ( + para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ, para->getParD(level)->neighborInverse, @@ -59,7 +46,7 @@ void InitCompSP27::init(int level) para->getParD(level)->omega, para->getParD(level)->isEvenTimestep); cudaDeviceSynchronize(); - getLastCudaError("LBInitNeqSP27 execution failed"); + getLastCudaError("LB_Init_Comp_Neq_SP_27 execution failed"); } diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu index cb6b40b4371a206c6d1e031822338621c4907be1..14d6b725337aa8b9af279bf794ff1c0912516b64 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu @@ -2,6 +2,7 @@ #include "InitF3_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Par void InitF3::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_F3 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->rho, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->g6.g[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("LBInitF3 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_F3 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->rho, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->g6.g[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_F3 execution failed"); } bool InitF3::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu index 419ae80b96be57f8dc9c4ebecaccac0d435f00e0..ea700010960b11a1facdda18c35f220f43eb6a66 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu @@ -2,6 +2,7 @@ #include "InitIncompAD27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared void InitIncompAD27::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_Incomp_AD_27 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->Conc, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->distributionsAD27.f[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("LBInitIncompAD27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_Incomp_AD_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->Conc, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->distributionsAD27.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_Incomp_AD_27 execution failed"); } bool InitIncompAD27::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu index 795cd0496a207e0861e35e4f310481950a037caf..d7c08e6932cacf2fb5a946010c1855212f1631fc 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu @@ -2,6 +2,7 @@ #include "InitIncompAD7_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_ void InitIncompAD7::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_Incomp_AD_7 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->Conc, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->distributionsAD27.f[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("LBInitIncompAD7 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_Incomp_AD_7 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->Conc, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->distributionsAD27.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_Incomp_AD_7 execution failed"); } bool InitIncompAD7::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu index 0538c7ab89eb750a40cfc47486dc0891d4493976..078ad24f24659bf10a3dc9ed90bfd62b5e021187 100644 --- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu +++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu @@ -2,6 +2,7 @@ #include "InitSP27_Device.cuh" #include "Parameter/Parameter.h" +#include "cuda/CudaGrid.h" std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<Parameter> para) { @@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<P void InitSP27::init(int level) { - int numberOfThreads = para->getParD(level)->numberofthreads; - int size_Mat = para->getParD(level)->numberOfNodes; - - int Grid = (size_Mat / numberOfThreads) + 1; - int Grid1, Grid2; - if (Grid>512) - { - Grid1 = 512; - Grid2 = (Grid / Grid1) + 1; - } - else - { - Grid1 = 1; - Grid2 = Grid; - } - dim3 grid(Grid1, Grid2); - dim3 threads(numberOfThreads, 1, 1); - - LB_Init_SP_27 << < grid, threads >> >( para->getParD(level)->neighborX, - para->getParD(level)->neighborY, - para->getParD(level)->neighborZ, - para->getParD(level)->typeOfGridNode, - para->getParD(level)->rho, - para->getParD(level)->velocityX, - para->getParD(level)->velocityY, - para->getParD(level)->velocityZ, - para->getParD(level)->numberOfNodes, - para->getParD(level)->distributions.f[0], - para->getParD(level)->isEvenTimestep); - getLastCudaError("LBInitSP27 execution failed"); + vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes); + + LB_Init_SP_27 <<< grid.grid, grid.threads >>>( + para->getParD(level)->neighborX, + para->getParD(level)->neighborY, + para->getParD(level)->neighborZ, + para->getParD(level)->typeOfGridNode, + para->getParD(level)->rho, + para->getParD(level)->velocityX, + para->getParD(level)->velocityY, + para->getParD(level)->velocityZ, + para->getParD(level)->numberOfNodes, + para->getParD(level)->distributions.f[0], + para->getParD(level)->isEvenTimestep); + getLastCudaError("LB_Init_SP_27 execution failed"); } bool InitSP27::checkParameter() diff --git a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp index a38535f7bdff0d87a5af74a69f0ed8255c647382..15813b7967a84e45f44eb4d286c41aa99c4ff343 100644 --- a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp +++ b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp @@ -9,12 +9,12 @@ void RestartObject::deserialize(const std::string &filename, std::shared_ptr<Par { deserialize_internal(filename); - for (int j = para->getCoarse(); j <= para->getFine(); j++) { + for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) { std::vector<real> vec; fs.push_back(vec); - for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) { - para->getParH(j)->distributions.f[0][i] = fs[j][i]; + for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) { + para->getParH(index1)->distributions.f[0][index2] = fs[index1][index2]; } } } @@ -24,15 +24,15 @@ void RestartObject::serialize(const std::string &filename, const std::shared_ptr if (fs.size() > 0) { clear(para); } - for (int j = para->getCoarse(); j <= para->getFine(); j++) { + for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) { std::vector<real> vec; fs.push_back(vec); - for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) { - if (UbMath::isNaN(para->getParH(j)->distributions.f[0][i])) { - fs[j].push_back((real)0.0); + for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) { + if (UbMath::isNaN(para->getParH(index1)->distributions.f[0][index2])) { + fs[index1].push_back((real)0.0); } else { - fs[j].push_back(para->getParH(j)->distributions.f[0][i]); + fs[index1].push_back(para->getParH(index1)->distributions.f[0][index2]); } } } diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt index afa90bdd3f95bb71cf7f1eda6407f9b38766072a..7a9a96ace1c7377b7ad0c67937464d1f2c00cce6 100644 --- a/src/lbm/CMakeLists.txt +++ b/src/lbm/CMakeLists.txt @@ -1,12 +1,12 @@ -if(BUILD_VF_CPU) - project(lbm LANGUAGES CXX) - vf_add_library(NAME lbm PUBLIC_LINK basics) - target_link_libraries(lbm PRIVATE project_warnings) - - vf_add_tests() -endif() +vf_add_library(PUBLIC_LINK basics) if(BUILD_VF_GPU OR BUILD_VF_GKS) - add_subdirectory(cuda) + set_target_properties(lbm PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON) + + set_source_files_properties(KernelParameter.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(CumulantChimera.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(BGK.cpp PROPERTIES LANGUAGE CUDA) endif() + +vf_add_tests() \ No newline at end of file diff --git a/src/lbm/KernelParameter.cpp b/src/lbm/KernelParameter.cpp index e039214d218ef19f35e8adf927f36d3a6f1aa355..7bf5a369d0e5d4e673d79dcb30bc22fc2c330e68 100644 --- a/src/lbm/KernelParameter.cpp +++ b/src/lbm/KernelParameter.cpp @@ -5,11 +5,8 @@ #include "MacroscopicQuantities.h" -namespace vf +namespace vf::lbm { -namespace lbm -{ - inline __host__ __device__ real Distribution27::getDensity_() const @@ -17,8 +14,6 @@ inline __host__ __device__ real Distribution27::getDensity_() const return getDensity(f); } - - __host__ __device__ real abs_internal(real value) { #ifdef __CUDA_ARCH__ @@ -30,4 +25,3 @@ __host__ __device__ real abs_internal(real value) } -} diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h index 95226628110637f3794c8a1f7e6f6c1f6dda937b..18c4f2a4b20b84d9d519993f3ddb54cf612d4306 100644 --- a/src/lbm/KernelParameter.h +++ b/src/lbm/KernelParameter.h @@ -11,9 +11,7 @@ #include <basics/Core/DataTypes.h> -namespace vf -{ -namespace lbm +namespace vf::lbm { struct Distribution27 @@ -35,9 +33,6 @@ struct KernelParameter }; - - -} } #endif diff --git a/src/lbm/constants/D3Q27.h b/src/lbm/constants/D3Q27.h index f923695f3756712748e638aee59121a5537456c5..c799331815ff92b41b3daf8433bcc10d026a8738 100644 --- a/src/lbm/constants/D3Q27.h +++ b/src/lbm/constants/D3Q27.h @@ -6,92 +6,92 @@ namespace vf::lbm::dir { - //real, double und float auf real -static constexpr int STARTDIR = 0; -static constexpr int ENDDIR = 26; + +static constexpr size_t STARTDIR = 0; +static constexpr size_t ENDDIR = 26; // used in the CPU and the GPU version -static constexpr int DIR_000 = 0; -static constexpr int DIR_P00 = 1; -static constexpr int DIR_M00 = 2; -static constexpr int DIR_0P0 = 3; -static constexpr int DIR_0M0 = 4; -static constexpr int DIR_00P = 5; -static constexpr int DIR_00M = 6; -static constexpr int DIR_PP0 = 7; -static constexpr int DIR_MM0 = 8; -static constexpr int DIR_PM0 = 9; -static constexpr int DIR_MP0 = 10; -static constexpr int DIR_P0P = 11; -static constexpr int DIR_M0M = 12; -static constexpr int DIR_P0M = 13; -static constexpr int DIR_M0P = 14; -static constexpr int DIR_0PP = 15; -static constexpr int DIR_0MM = 16; -static constexpr int DIR_0PM = 17; -static constexpr int DIR_0MP = 18; -static constexpr int DIR_PPP = 19; -static constexpr int DIR_MPP = 20; -static constexpr int DIR_PMP = 21; -static constexpr int DIR_MMP = 22; -static constexpr int DIR_PPM = 23; -static constexpr int DIR_MPM = 24; -static constexpr int DIR_PMM = 25; -static constexpr int DIR_MMM = 26; - -static constexpr int INV_P00 = DIR_M00; -static constexpr int INV_M00 = DIR_P00; -static constexpr int INV_0P0 = DIR_0M0; -static constexpr int INV_0M0 = DIR_0P0; -static constexpr int INV_00P = DIR_00M; -static constexpr int INV_00M = DIR_00P; -static constexpr int INV_PP0 = DIR_MM0; -static constexpr int INV_MM0 = DIR_PP0; -static constexpr int INV_PM0 = DIR_MP0; -static constexpr int INV_MP0 = DIR_PM0; -static constexpr int INV_P0P = DIR_M0M; -static constexpr int INV_M0M = DIR_P0P; -static constexpr int INV_P0M = DIR_M0P; -static constexpr int INV_M0P = DIR_P0M; -static constexpr int INV_0PP = DIR_0MM; -static constexpr int INV_0MM = DIR_0PP; -static constexpr int INV_0PM = DIR_0MP; -static constexpr int INV_0MP = DIR_0PM; -static constexpr int INV_PPP = DIR_MMM; -static constexpr int INV_MPP = DIR_PMM; -static constexpr int INV_PMP = DIR_MPM; -static constexpr int INV_MMP = DIR_PPM; -static constexpr int INV_PPM = DIR_MMP; -static constexpr int INV_MPM = DIR_PMP; -static constexpr int INV_PMM = DIR_MPP; -static constexpr int INV_MMM = DIR_PPP; - -static constexpr int SGD_P00 = 0; -static constexpr int SGD_M00 = 1; -static constexpr int SGD_0P0 = 2; -static constexpr int SGD_0M0 = 3; -static constexpr int SGD_00P = 4; -static constexpr int SGD_00M = 5; -static constexpr int SGD_PP0 = 6; -static constexpr int SGD_MM0 = 7; -static constexpr int SGD_PM0 = 8; -static constexpr int SGD_MP0 = 9; -static constexpr int SGD_P0P = 10; -static constexpr int SGD_M0M = 11; -static constexpr int SGD_P0M = 12; -static constexpr int SGD_M0P = 13; -static constexpr int SGD_0PP = 14; -static constexpr int SGD_0MM = 15; -static constexpr int SGD_0PM = 16; -static constexpr int SGD_0MP = 17; -static constexpr int SGD_PPP = 18; -static constexpr int SGD_MPP = 19; -static constexpr int SGD_PMP = 20; -static constexpr int SGD_MMP = 21; -static constexpr int SGD_PPM = 22; -static constexpr int SGD_MPM = 23; -static constexpr int SGD_PMM = 24; -static constexpr int SGD_MMM = 25; +static constexpr size_t DIR_000 = 0; +static constexpr size_t DIR_P00 = 1; +static constexpr size_t DIR_M00 = 2; +static constexpr size_t DIR_0P0 = 3; +static constexpr size_t DIR_0M0 = 4; +static constexpr size_t DIR_00P = 5; +static constexpr size_t DIR_00M = 6; +static constexpr size_t DIR_PP0 = 7; +static constexpr size_t DIR_MM0 = 8; +static constexpr size_t DIR_PM0 = 9; +static constexpr size_t DIR_MP0 = 10; +static constexpr size_t DIR_P0P = 11; +static constexpr size_t DIR_M0M = 12; +static constexpr size_t DIR_P0M = 13; +static constexpr size_t DIR_M0P = 14; +static constexpr size_t DIR_0PP = 15; +static constexpr size_t DIR_0MM = 16; +static constexpr size_t DIR_0PM = 17; +static constexpr size_t DIR_0MP = 18; +static constexpr size_t DIR_PPP = 19; +static constexpr size_t DIR_MPP = 20; +static constexpr size_t DIR_PMP = 21; +static constexpr size_t DIR_MMP = 22; +static constexpr size_t DIR_PPM = 23; +static constexpr size_t DIR_MPM = 24; +static constexpr size_t DIR_PMM = 25; +static constexpr size_t DIR_MMM = 26; + +static constexpr size_t INV_P00 = DIR_M00; +static constexpr size_t INV_M00 = DIR_P00; +static constexpr size_t INV_0P0 = DIR_0M0; +static constexpr size_t INV_0M0 = DIR_0P0; +static constexpr size_t INV_00P = DIR_00M; +static constexpr size_t INV_00M = DIR_00P; +static constexpr size_t INV_PP0 = DIR_MM0; +static constexpr size_t INV_MM0 = DIR_PP0; +static constexpr size_t INV_PM0 = DIR_MP0; +static constexpr size_t INV_MP0 = DIR_PM0; +static constexpr size_t INV_P0P = DIR_M0M; +static constexpr size_t INV_M0M = DIR_P0P; +static constexpr size_t INV_P0M = DIR_M0P; +static constexpr size_t INV_M0P = DIR_P0M; +static constexpr size_t INV_0PP = DIR_0MM; +static constexpr size_t INV_0MM = DIR_0PP; +static constexpr size_t INV_0PM = DIR_0MP; +static constexpr size_t INV_0MP = DIR_0PM; +static constexpr size_t INV_PPP = DIR_MMM; +static constexpr size_t INV_MPP = DIR_PMM; +static constexpr size_t INV_PMP = DIR_MPM; +static constexpr size_t INV_MMP = DIR_PPM; +static constexpr size_t INV_PPM = DIR_MMP; +static constexpr size_t INV_MPM = DIR_PMP; +static constexpr size_t INV_PMM = DIR_MPP; +static constexpr size_t INV_MMM = DIR_PPP; + +static constexpr size_t SGD_P00 = 0; +static constexpr size_t SGD_M00 = 1; +static constexpr size_t SGD_0P0 = 2; +static constexpr size_t SGD_0M0 = 3; +static constexpr size_t SGD_00P = 4; +static constexpr size_t SGD_00M = 5; +static constexpr size_t SGD_PP0 = 6; +static constexpr size_t SGD_MM0 = 7; +static constexpr size_t SGD_PM0 = 8; +static constexpr size_t SGD_MP0 = 9; +static constexpr size_t SGD_P0P = 10; +static constexpr size_t SGD_M0M = 11; +static constexpr size_t SGD_P0M = 12; +static constexpr size_t SGD_M0P = 13; +static constexpr size_t SGD_0PP = 14; +static constexpr size_t SGD_0MM = 15; +static constexpr size_t SGD_0PM = 16; +static constexpr size_t SGD_0MP = 17; +static constexpr size_t SGD_PPP = 18; +static constexpr size_t SGD_MPP = 19; +static constexpr size_t SGD_PMP = 20; +static constexpr size_t SGD_MMP = 21; +static constexpr size_t SGD_PPM = 22; +static constexpr size_t SGD_MPM = 23; +static constexpr size_t SGD_PMM = 24; +static constexpr size_t SGD_MMM = 25; struct countersForPointerChasing{ uint counterInverse; @@ -100,7 +100,7 @@ struct countersForPointerChasing{ uint counterZ; }; -const std::map<const int, const countersForPointerChasing> mapForPointerChasing = +const std::map<const size_t, const countersForPointerChasing> mapForPointerChasing = { {DIR_000, countersForPointerChasing{0, 0, 0, 0}}, {DIR_P00, countersForPointerChasing{0, 1, 0, 0}}, diff --git a/src/lbm/constants/NumericConstants.h b/src/lbm/constants/NumericConstants.h index fb7764255201dbd31ac1134e756fc4bfd6e3d982..e642c2c95171927a7f8dc8f1a911d98117af66a6 100644 --- a/src/lbm/constants/NumericConstants.h +++ b/src/lbm/constants/NumericConstants.h @@ -18,6 +18,7 @@ static constexpr double c1o8 = 0.125; static constexpr double c1o9 = 0.111111111111111; static constexpr double c2o9 = 0.222222222222222; static constexpr double c4o9 = 0.444444444444444; +static constexpr double c4o10 = 0.4; static constexpr double c1o10 = 0.1; static constexpr double c1o12 = 0.083333333333333; static constexpr double c1o16 = 0.0625; @@ -48,6 +49,7 @@ static constexpr double c99o100 = 0.99; static constexpr double c1o126 = 0.007936507936508; static constexpr double c1o216 = 0.004629629629630; static constexpr double c5o4 = 1.25; +static constexpr double c4o3 = 1.333333333333333; static constexpr double c9o4 = 2.25; static constexpr double c5o2 = 2.5; static constexpr double c9o2 = 4.5; @@ -99,15 +101,15 @@ static constexpr double c72o1 = 72.; static constexpr double c84o1 = 84.; static constexpr double c88o1 = 88.; static constexpr double c96o1 = 96.; -static constexpr double c100o1 = 10.; -static constexpr double c130o1 = 13.; -static constexpr double c152o1 = 15.; -static constexpr double c166o1 = 16.; -static constexpr double c195o1 = 19.; -static constexpr double c216o1 = 21.; -static constexpr double c264o1 = 26.; -static constexpr double c290o1 = 29.; -static constexpr double c367o1 = 36.; +static constexpr double c100o1 = 100.; +static constexpr double c130o1 = 130.; +static constexpr double c152o1 = 152.; +static constexpr double c166o1 = 166.; +static constexpr double c195o1 = 195.; +static constexpr double c216o1 = 216.; +static constexpr double c264o1 = 264.; +static constexpr double c290o1 = 290.; +static constexpr double c367o1 = 367.; static constexpr double Op0000002 = 0.0000002; static constexpr double c10eM30 = 1e-30; @@ -137,6 +139,7 @@ static constexpr float c1o8 = 0.125f; static constexpr float c1o9 = (1.0f / 9.0f); static constexpr float c2o9 = (2.0f / 9.0f); static constexpr float c4o9 = (4.0f / 9.0f); +static constexpr float c4o10 = 0.4f; static constexpr float c1o10 = 0.1f; static constexpr float c1o12 = (1.0f / 12.0f); static constexpr float c1o16 = 0.0625f; @@ -167,6 +170,7 @@ static constexpr float c99o100 = 0.99f; static constexpr float c1o126 = (1.0f / 126.0f); static constexpr float c1o216 = (1.0f / 216.0f); static constexpr float c5o4 = 1.25f; +static constexpr float c4o3 = (4.0f / 3.0f); static constexpr float c9o4 = 2.25f; static constexpr float c5o2 = 2.5f; static constexpr float c9o2 = 4.5f; diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt deleted file mode 100644 index 4142b7c3b1c46275c3257e3dfd657cc6b30c841d..0000000000000000000000000000000000000000 --- a/src/lbm/cuda/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -project(lbmCuda LANGUAGES CUDA CXX) - - -vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm) - - -set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON) - - -set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA) -set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA) -set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA) diff --git a/utilities/setup_builder.py b/utilities/setup_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..94e4f805b3e05024742ead72e3ffb28c477c282f --- /dev/null +++ b/utilities/setup_builder.py @@ -0,0 +1,34 @@ +from setuptools import build_meta + +class builder(build_meta._BuildMetaBackend): + + def run_setup(self, setup_script='setup.py'): + # Note that we can reuse our build directory between calls + # Correctness comes first, then optimization later + __file__ = setup_script + __name__ = '__main__' + + with build_meta._open_setup_script(__file__) as f: + code = f.read().replace(r'\r\n', r'\n') + args = locals() + args["config_args"] = self.extra_args + exec(code, args) + + + def add_settings(self, config_settings): + self.extra_args = dict() + if config_settings: + self.extra_args = config_settings + + def build_wheel(self, wheel_directory, config_settings=None, + metadata_directory=None): + self.add_settings(config_settings) + return super().build_wheel(wheel_directory, config_settings, metadata_directory) + + def build_sdist(self, sdist_directory, config_settings=None): + self.add_settings(config_settings) + return super().build_wheel(sdist_directory, config_settings) + +build = builder() +build_wheel = build.build_wheel +build_sdist = build.build_sdist \ No newline at end of file