diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7534033e923c90154cbc872c71e9ffd0057398a1..b84d22038b3159fd5aedb951fb48c35b99babe94 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: irmb/virtualfluids-python-deps +image: irmb/virtualfluids-python-deps:latest stages: - build @@ -11,7 +11,6 @@ stages: ############################################################################### ## Builds ## ############################################################################### - .gnu_build_template: stage: build @@ -42,8 +41,8 @@ stages: - mkdir -p $CI_PROJECT_DIR/$BUILD_FOLDER - cd $CI_PROJECT_DIR/$BUILD_FOLDER - cmake .. - -DBUILD_VF_CPU=ON - -DBUILD_VF_GPU=ON + --preset=all_make + -DCMAKE_CUDA_ARCHITECTURES=60 - cmake . -LAH - make -j4 @@ -55,6 +54,7 @@ gcc_9: extends: .gnu_build_template ############################################################################### + clang_10: extends: .gnu_build_template @@ -83,11 +83,8 @@ gcc_9_rebuild: - cd $CI_PROJECT_DIR/build - rm -r -f ./* - cmake .. - -DBUILD_VF_CPU=ON - -DBUILD_VF_GPU=ON - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache - -DCMAKE_C_COMPILER_LAUNCHER=ccache + --preset=all_make_ccache + -DCMAKE_CUDA_ARCHITECTURES=60 - make -j4 2>&1 | tee gcc_warnings.txt - ccache -s @@ -100,6 +97,37 @@ gcc_9_rebuild: paths: - $CI_PROJECT_DIR/cache + +############################################################################### +gcc_9_cpu_warning_like_errors: + stage: build + + image: irmb/virtualfluids-deps-ubuntu20.04 + + tags: + - gpu + - linux + + before_script: + - export CCACHE_BASEDIR=$CI_PROJECT_DIR + - export CCACHE_DIR=$CI_PROJECT_DIR/cache + - ccache -s + + script: + - mkdir -p $CI_PROJECT_DIR/build + - cd $CI_PROJECT_DIR/build + - rm -r -f ./* + - cmake .. + --preset=cpu_make_ccache + -DBUILD_WARNINGS_AS_ERRORS=ON + - make -j4 + - ccache -s + + cache: + key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + paths: + - $CI_PROJECT_DIR/cache + ############################################################################### msvc_16: stage: build @@ -109,12 +137,8 @@ msvc_16: - gpu variables: - CMAKE_GENERATOR: "Visual Studio 16 2019" BUILD_CONFIGURATION: "Release" BUILD_FOLDER: "build" - BUILD_VF_CPU: "ON" - BUILD_VF_GPU: "ON" - BUILD_VF_UNIT_TESTS: "ON" # add cmake and MSBuild.exe to the path. # This Needs to be adapted when moved to a new build machine. @@ -129,7 +153,7 @@ msvc_16: - cd $CI_PROJECT_DIR - md -force $env:BUILD_FOLDER - cd $env:BUILD_FOLDER - - cmake .. -DBUILD_VF_CPU=$env:BUILD_VF_CPU -DBUILD_VF_GPU=$env:BUILD_VF_GPU -DBUILD_VF_UNIT_TESTS=$env:BUILD_VF_UNIT_TESTS -G "$env:CMAKE_GENERATOR" .. + - cmake .. --preset=all_msvc - MSBuild.exe VirtualFluids.sln /property:Configuration=$env:BUILD_CONFIGURATION /verbosity:minimal /maxcpucount:4 cache: @@ -144,6 +168,11 @@ msvc_16: ############################################################################### build_singularity_image: stage: build + + tags: + - priviliged + - linux + rules: - if: $CI_COMMIT_TAG @@ -152,13 +181,24 @@ build_singularity_image: - Containers/VirtualFluidsOpenMPI.sif script: - - singularity build Containers/VirtualFluidsOpenMPI.sif Containers/VirtualFluidsOpenMPI.def + - singularity build --fakeroot Containers/VirtualFluidsOpenMPI.sif Containers/VirtualFluidsOpenMPI.def - ls -sh Containers/VirtualFluidsOpenMPI.sif ############################################################################### ## Tests ## ############################################################################### +gcc_9_unit_tests: + stage: test + + needs: ["gcc_9"] + before_script: + - cd $CI_PROJECT_DIR/build + + script: + - ./bin/basicsTests + +############################################################################### msvc_16_unit_tests: stage: test @@ -203,7 +243,7 @@ clang_build_analyzer_clang_10: - cmake .. -DBUILD_VF_CPU=ON -DBUILD_VF_GPU=ON - -DUSE_OPENMP=OFF + -DCMAKE_CUDA_ARCHITECTURES=60 -DCMAKE_CXX_FLAGS=-ftime-trace - ClangBuildAnalyzer --start . - make @@ -238,7 +278,7 @@ include_what_you_use_clang_10: - cmake .. -DBUILD_VF_CPU=ON -DBUILD_VF_GPU=ON - -DUSE_OPENMP=OFF + -DCMAKE_CUDA_ARCHITECTURES=60 -DBUILD_VF_INCLUDE_WHAT_YOU_USE=ON - make @@ -246,6 +286,9 @@ include_what_you_use_clang_10: cppcheck: stage: analyze + only: + - open_source@irmb/VirtualFluids_dev + image: irmb/virtualfluids-deps-ubuntu20.04 needs: [] @@ -272,6 +315,9 @@ cppcheck: lizard: stage: analyze + only: + - open_source@irmb/VirtualFluids_dev + image: irmb/virtualfluids-python-deps-ubuntu20.04 needs: [] @@ -285,7 +331,7 @@ lizard: script: - cd $CI_PROJECT_DIR - - lizard -l cpp src/ > lizard.txt --ignore_warnings 191 + - lizard -l cpp src/ > lizard.txt --warnings_only --ignore_warnings 400 artifacts: paths: @@ -296,6 +342,9 @@ lizard: gcov_gcc_9: stage: analyze + only: + - open_source@irmb/VirtualFluids_dev + image: irmb/virtualfluids-python-deps-ubuntu20.04 needs: [] @@ -310,10 +359,9 @@ gcov_gcc_9: - mkdir -p $CI_PROJECT_DIR/build - cd $CI_PROJECT_DIR/build - cmake .. - -DBUILD_VF_CPU=ON - -DBUILD_VF_GPU=ON + --preset=all_make + -DCMAKE_CUDA_ARCHITECTURES=60 -DBUILD_VF_COVERAGE=ON - -DBUILD_VF_UNIT_TESTS=ON - make -j4 - ./bin/basicsTests - cd .. @@ -338,6 +386,9 @@ gcov_gcc_9: clang-tidy: stage: analyze + only: + - open_source@irmb/VirtualFluids_dev + image: irmb/virtualfluids-python-deps-ubuntu20.04 needs: [] @@ -369,6 +420,8 @@ clang-tidy: # the reports in this file needs to match the artifacts. sonar-scanner: stage: deploy + tags: + - linux only: - open_source@irmb/VirtualFluids_dev @@ -412,4 +465,4 @@ create_release: --ref "$CI_COMMIT_SHA" \ --job-token "$CI_JOB_TOKEN" \ --assets-link="{'name':'VirtualFluidsSingularityImage_OpenMPI','url':'','type':'other','filepath':'Containers/VirtualFluidsOpenMPI.sif'}" - - build/bin/basicsTests \ No newline at end of file + - build/bin/basicsTests diff --git a/3rdParty/MuParser/CMakeLists.txt b/3rdParty/MuParser/CMakeLists.txt index 49aa3ac25552f6050de4eb5ae08928fddb8a6cf4..1634a2f15a71aeec53bd0ffb5f14c22aec7893aa 100644 --- a/3rdParty/MuParser/CMakeLists.txt +++ b/3rdParty/MuParser/CMakeLists.txt @@ -14,15 +14,9 @@ set(MUPARSER_VERSION ${MUPARSER_VERSION_MAJOR}.${MUPARSER_VERSION_MINOR}.${MUPAR # Build options option(ENABLE_SAMPLES "Build the samples" OFF) -option(ENABLE_OPENMP "Enable OpenMP for multithreading" ON) +#option(ENABLE_OPENMP "Enable OpenMP for multithreading" ON) #option(BUILD_SHARED_LIBS "Build shared/static libs" ON) -if(ENABLE_OPENMP) - find_package(OpenMP REQUIRED) - set(CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_SHARED_LIBRARY_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}") -endif() - # Credit: https://stackoverflow.com/questions/2368811/how-to-set-warning-level-in-cmake/3818084 if(MSVC) @@ -50,7 +44,9 @@ add_library(muparser src/muParserTokenReader.cpp ) -target_link_libraries(muparser PUBLIC OpenMP::OpenMP_CXX) +if(BUILD_USE_OPENMP) + target_link_libraries(muparser PUBLIC OpenMP::OpenMP_CXX) +endif() target_include_directories(muparser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) # this compiles the "DLL" interface (C API) @@ -66,7 +62,7 @@ if (CMAKE_BUILD_TYPE STREQUAL Debug) target_compile_definitions(muparser PRIVATE _DEBUG) endif () -if(ENABLE_OPENMP) +if(BUILD_USE_OPENMP) target_compile_definitions(muparser PRIVATE MUP_USE_OPENMP) endif() set_target_properties(muparser PROPERTIES diff --git a/3rdParty/MuParser/include/muParser.h b/3rdParty/MuParser/include/muParser.h index ab1e21e5b2a7506593de90d4845b6c2e5e39904a..3a2bedc39180c678a63cfa81b893a339b85324f7 100644 --- a/3rdParty/MuParser/include/muParser.h +++ b/3rdParty/MuParser/include/muParser.h @@ -29,7 +29,9 @@ #ifndef MU_PARSER_H #define MU_PARSER_H +#ifdef __clang__ #pragma clang system_header +#endif //--- Standard includes ------------------------------------------------------------------------ #include <vector> diff --git a/3rdParty/MuParser/include/muParserBase.h b/3rdParty/MuParser/include/muParserBase.h index e42aca8b121c95b3d61e4ef2414f2f0bea76cab1..5ccadd99d1d33b6e69a123480a31b82079234945 100644 --- a/3rdParty/MuParser/include/muParserBase.h +++ b/3rdParty/MuParser/include/muParserBase.h @@ -29,7 +29,9 @@ #ifndef MU_PARSER_BASE_H #define MU_PARSER_BASE_H +#ifdef __clang__ #pragma clang system_header +#endif //--- Standard includes ------------------------------------------------------------------------ #include <cmath> diff --git a/3rdParty/MuParser/include/muParserDLL.h b/3rdParty/MuParser/include/muParserDLL.h index 18051c4d7835b266a463bd36f974d66d5729b13d..14c65b48a463c6bfb52fb92cca2daf62988df937 100644 --- a/3rdParty/MuParser/include/muParserDLL.h +++ b/3rdParty/MuParser/include/muParserDLL.h @@ -29,6 +29,10 @@ #ifndef MU_PARSER_DLL_H #define MU_PARSER_DLL_H +#ifdef __clang__ +#pragma clang system_header +#endif + #include "muParserFixes.h" #ifdef __cplusplus diff --git a/3rdParty/cuda_samples/README b/3rdParty/cuda_samples/README new file mode 100644 index 0000000000000000000000000000000000000000..5db13e7bda8365d792e3e68840d02c442348596e --- /dev/null +++ b/3rdParty/cuda_samples/README @@ -0,0 +1,2 @@ +# 3rd party cuda +The files in this folder are added from here https://github.com/NVIDIA/cuda-samples/blob/v11.2/Common/. \ No newline at end of file diff --git a/3rdParty/cuda_samples/exception.h b/3rdParty/cuda_samples/exception.h new file mode 100644 index 0000000000000000000000000000000000000000..84e348b59fb6892439e8057b03093bac48cadcd4 --- /dev/null +++ b/3rdParty/cuda_samples/exception.h @@ -0,0 +1,151 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* CUda UTility Library */ +#ifndef COMMON_EXCEPTION_H_ +#define COMMON_EXCEPTION_H_ + +// includes, system +#include <stdlib.h> +#include <exception> +#include <iostream> +#include <stdexcept> +#include <string> + +//! Exception wrapper. +//! @param Std_Exception Exception out of namespace std for easy typing. +template <class Std_Exception> +class Exception : public Std_Exception { +public: + //! @brief Static construction interface + //! @return Alwayss throws ( Located_Exception<Exception>) + //! @param file file in which the Exception occurs + //! @param line line in which the Exception occurs + //! @param detailed details on the code fragment causing the Exception + static void throw_it(const char *file, const int line, + const char *detailed = "-"); + + //! Static construction interface + //! @return Alwayss throws ( Located_Exception<Exception>) + //! @param file file in which the Exception occurs + //! @param line line in which the Exception occurs + //! @param detailed details on the code fragment causing the Exception + static void throw_it(const char *file, const int line, + const std::string &detailed); + + //! Destructor + virtual ~Exception() throw(); + +private: + //! Constructor, default (private) + Exception(); + + //! Constructor, standard + //! @param str string returned by what() + explicit Exception(const std::string &str); +}; + +//////////////////////////////////////////////////////////////////////////////// +//! Exception handler function for arbitrary exceptions +//! @param ex exception to handle +//////////////////////////////////////////////////////////////////////////////// +template <class Exception_Typ> +inline void handleException(const Exception_Typ &ex) { + std::cerr << ex.what() << std::endl; + + exit(EXIT_FAILURE); +} + +//! Convenience macros + +//! Exception caused by dynamic program behavior, e.g. file does not exist +#define RUNTIME_EXCEPTION(msg) \ + Exception<std::runtime_error>::throw_it(__FILE__, __LINE__, msg) + +//! Logic exception in program, e.g. an assert failed +#define LOGIC_EXCEPTION(msg) \ + Exception<std::logic_error>::throw_it(__FILE__, __LINE__, msg) + +//! Out of range exception +#define RANGE_EXCEPTION(msg) \ + Exception<std::range_error>::throw_it(__FILE__, __LINE__, msg) + +//////////////////////////////////////////////////////////////////////////////// +//! Implementation + +// includes, system +#include <sstream> + +//////////////////////////////////////////////////////////////////////////////// +//! Static construction interface. +//! @param Exception causing code fragment (file and line) and detailed infos. +//////////////////////////////////////////////////////////////////////////////// +/*static*/ template <class Std_Exception> +void Exception<Std_Exception>::throw_it(const char *file, const int line, + const char *detailed) { + std::stringstream s; + + // Quiet heavy-weight but exceptions are not for + // performance / release versions + s << "Exception in file '" << file << "' in line " << line << "\n" + << "Detailed description: " << detailed << "\n"; + + throw Exception(s.str()); +} + +//////////////////////////////////////////////////////////////////////////////// +//! Static construction interface. +//! @param Exception causing code fragment (file and line) and detailed infos. +//////////////////////////////////////////////////////////////////////////////// +/*static*/ template <class Std_Exception> +void Exception<Std_Exception>::throw_it(const char *file, const int line, + const std::string &msg) { + throw_it(file, line, msg.c_str()); +} + +//////////////////////////////////////////////////////////////////////////////// +//! Constructor, default (private). +//////////////////////////////////////////////////////////////////////////////// +template <class Std_Exception> +Exception<Std_Exception>::Exception() : Std_Exception("Unknown Exception.\n") {} + +//////////////////////////////////////////////////////////////////////////////// +//! Constructor, standard (private). +//! String returned by what(). +//////////////////////////////////////////////////////////////////////////////// +template <class Std_Exception> +Exception<Std_Exception>::Exception(const std::string &s) : Std_Exception(s) {} + +//////////////////////////////////////////////////////////////////////////////// +//! Destructor +//////////////////////////////////////////////////////////////////////////////// +template <class Std_Exception> +Exception<Std_Exception>::~Exception() throw() {} + +// functions, exported + +#endif // COMMON_EXCEPTION_H_ \ No newline at end of file diff --git a/3rdParty/cuda_samples/helper_cuda.h b/3rdParty/cuda_samples/helper_cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..81d3f9e76983a32daff4c2649ab0880e29f9881a --- /dev/null +++ b/3rdParty/cuda_samples/helper_cuda.h @@ -0,0 +1,967 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//////////////////////////////////////////////////////////////////////////////// +// These are CUDA Helper functions for initialization and error checking + +#ifndef COMMON_HELPER_CUDA_H_ +#define COMMON_HELPER_CUDA_H_ + +#pragma once + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <helper_string.h> + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +// Note, it is required that your SDK sample to include the proper header +// files, please refer the CUDA examples for examples of the needed CUDA +// headers, which may change depending on which CUDA functions are used. + +// CUDA Runtime error messages +#ifdef __DRIVER_TYPES_H__ +static const char *_cudaGetErrorEnum(cudaError_t error) { + return cudaGetErrorName(error); +} +#endif + +#ifdef CUDA_DRIVER_API +// CUDA Driver API errors +static const char *_cudaGetErrorEnum(CUresult error) { + static char unknown[] = "<unknown>"; + const char *ret = NULL; + cuGetErrorName(error, &ret); + return ret ? ret : unknown; +} +#endif + +#ifdef CUBLAS_API_H_ +// cuBLAS API errors +static const char *_cudaGetErrorEnum(cublasStatus_t error) { + switch (error) { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; + } + + return "<unknown>"; +} +#endif + +#ifdef _CUFFT_H_ +// cuFFT API errors +static const char *_cudaGetErrorEnum(cufftResult error) { + switch (error) { + case CUFFT_SUCCESS: + return "CUFFT_SUCCESS"; + + case CUFFT_INVALID_PLAN: + return "CUFFT_INVALID_PLAN"; + + case CUFFT_ALLOC_FAILED: + return "CUFFT_ALLOC_FAILED"; + + case CUFFT_INVALID_TYPE: + return "CUFFT_INVALID_TYPE"; + + case CUFFT_INVALID_VALUE: + return "CUFFT_INVALID_VALUE"; + + case CUFFT_INTERNAL_ERROR: + return "CUFFT_INTERNAL_ERROR"; + + case CUFFT_EXEC_FAILED: + return "CUFFT_EXEC_FAILED"; + + case CUFFT_SETUP_FAILED: + return "CUFFT_SETUP_FAILED"; + + case CUFFT_INVALID_SIZE: + return "CUFFT_INVALID_SIZE"; + + case CUFFT_UNALIGNED_DATA: + return "CUFFT_UNALIGNED_DATA"; + + case CUFFT_INCOMPLETE_PARAMETER_LIST: + return "CUFFT_INCOMPLETE_PARAMETER_LIST"; + + case CUFFT_INVALID_DEVICE: + return "CUFFT_INVALID_DEVICE"; + + case CUFFT_PARSE_ERROR: + return "CUFFT_PARSE_ERROR"; + + case CUFFT_NO_WORKSPACE: + return "CUFFT_NO_WORKSPACE"; + + case CUFFT_NOT_IMPLEMENTED: + return "CUFFT_NOT_IMPLEMENTED"; + + case CUFFT_LICENSE_ERROR: + return "CUFFT_LICENSE_ERROR"; + + case CUFFT_NOT_SUPPORTED: + return "CUFFT_NOT_SUPPORTED"; + } + + return "<unknown>"; +} +#endif + +#ifdef CUSPARSEAPI +// cuSPARSE API errors +static const char *_cudaGetErrorEnum(cusparseStatus_t error) { + switch (error) { + case CUSPARSE_STATUS_SUCCESS: + return "CUSPARSE_STATUS_SUCCESS"; + + case CUSPARSE_STATUS_NOT_INITIALIZED: + return "CUSPARSE_STATUS_NOT_INITIALIZED"; + + case CUSPARSE_STATUS_ALLOC_FAILED: + return "CUSPARSE_STATUS_ALLOC_FAILED"; + + case CUSPARSE_STATUS_INVALID_VALUE: + return "CUSPARSE_STATUS_INVALID_VALUE"; + + case CUSPARSE_STATUS_ARCH_MISMATCH: + return "CUSPARSE_STATUS_ARCH_MISMATCH"; + + case CUSPARSE_STATUS_MAPPING_ERROR: + return "CUSPARSE_STATUS_MAPPING_ERROR"; + + case CUSPARSE_STATUS_EXECUTION_FAILED: + return "CUSPARSE_STATUS_EXECUTION_FAILED"; + + case CUSPARSE_STATUS_INTERNAL_ERROR: + return "CUSPARSE_STATUS_INTERNAL_ERROR"; + + case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + } + + return "<unknown>"; +} +#endif + +#ifdef CUSOLVER_COMMON_H_ +// cuSOLVER API errors +static const char *_cudaGetErrorEnum(cusolverStatus_t error) { + switch (error) { + case CUSOLVER_STATUS_SUCCESS: + return "CUSOLVER_STATUS_SUCCESS"; + case CUSOLVER_STATUS_NOT_INITIALIZED: + return "CUSOLVER_STATUS_NOT_INITIALIZED"; + case CUSOLVER_STATUS_ALLOC_FAILED: + return "CUSOLVER_STATUS_ALLOC_FAILED"; + case CUSOLVER_STATUS_INVALID_VALUE: + return "CUSOLVER_STATUS_INVALID_VALUE"; + case CUSOLVER_STATUS_ARCH_MISMATCH: + return "CUSOLVER_STATUS_ARCH_MISMATCH"; + case CUSOLVER_STATUS_MAPPING_ERROR: + return "CUSOLVER_STATUS_MAPPING_ERROR"; + case CUSOLVER_STATUS_EXECUTION_FAILED: + return "CUSOLVER_STATUS_EXECUTION_FAILED"; + case CUSOLVER_STATUS_INTERNAL_ERROR: + return "CUSOLVER_STATUS_INTERNAL_ERROR"; + case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + case CUSOLVER_STATUS_NOT_SUPPORTED: + return "CUSOLVER_STATUS_NOT_SUPPORTED "; + case CUSOLVER_STATUS_ZERO_PIVOT: + return "CUSOLVER_STATUS_ZERO_PIVOT"; + case CUSOLVER_STATUS_INVALID_LICENSE: + return "CUSOLVER_STATUS_INVALID_LICENSE"; + } + + return "<unknown>"; +} +#endif + +#ifdef CURAND_H_ +// cuRAND API errors +static const char *_cudaGetErrorEnum(curandStatus_t error) { + switch (error) { + case CURAND_STATUS_SUCCESS: + return "CURAND_STATUS_SUCCESS"; + + case CURAND_STATUS_VERSION_MISMATCH: + return "CURAND_STATUS_VERSION_MISMATCH"; + + case CURAND_STATUS_NOT_INITIALIZED: + return "CURAND_STATUS_NOT_INITIALIZED"; + + case CURAND_STATUS_ALLOCATION_FAILED: + return "CURAND_STATUS_ALLOCATION_FAILED"; + + case CURAND_STATUS_TYPE_ERROR: + return "CURAND_STATUS_TYPE_ERROR"; + + case CURAND_STATUS_OUT_OF_RANGE: + return "CURAND_STATUS_OUT_OF_RANGE"; + + case CURAND_STATUS_LENGTH_NOT_MULTIPLE: + return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; + + case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: + return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; + + case CURAND_STATUS_LAUNCH_FAILURE: + return "CURAND_STATUS_LAUNCH_FAILURE"; + + case CURAND_STATUS_PREEXISTING_FAILURE: + return "CURAND_STATUS_PREEXISTING_FAILURE"; + + case CURAND_STATUS_INITIALIZATION_FAILED: + return "CURAND_STATUS_INITIALIZATION_FAILED"; + + case CURAND_STATUS_ARCH_MISMATCH: + return "CURAND_STATUS_ARCH_MISMATCH"; + + case CURAND_STATUS_INTERNAL_ERROR: + return "CURAND_STATUS_INTERNAL_ERROR"; + } + + return "<unknown>"; +} +#endif + +#ifdef NVJPEGAPI +// nvJPEG API errors +static const char *_cudaGetErrorEnum(nvjpegStatus_t error) { + switch (error) { + case NVJPEG_STATUS_SUCCESS: + return "NVJPEG_STATUS_SUCCESS"; + + case NVJPEG_STATUS_NOT_INITIALIZED: + return "NVJPEG_STATUS_NOT_INITIALIZED"; + + case NVJPEG_STATUS_INVALID_PARAMETER: + return "NVJPEG_STATUS_INVALID_PARAMETER"; + + case NVJPEG_STATUS_BAD_JPEG: + return "NVJPEG_STATUS_BAD_JPEG"; + + case NVJPEG_STATUS_JPEG_NOT_SUPPORTED: + return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED"; + + case NVJPEG_STATUS_ALLOCATOR_FAILURE: + return "NVJPEG_STATUS_ALLOCATOR_FAILURE"; + + case NVJPEG_STATUS_EXECUTION_FAILED: + return "NVJPEG_STATUS_EXECUTION_FAILED"; + + case NVJPEG_STATUS_ARCH_MISMATCH: + return "NVJPEG_STATUS_ARCH_MISMATCH"; + + case NVJPEG_STATUS_INTERNAL_ERROR: + return "NVJPEG_STATUS_INTERNAL_ERROR"; + } + + return "<unknown>"; +} +#endif + +#ifdef NV_NPPIDEFS_H +// NPP API errors +static const char *_cudaGetErrorEnum(NppStatus error) { + switch (error) { + case NPP_NOT_SUPPORTED_MODE_ERROR: + return "NPP_NOT_SUPPORTED_MODE_ERROR"; + + case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: + return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; + + case NPP_RESIZE_NO_OPERATION_ERROR: + return "NPP_RESIZE_NO_OPERATION_ERROR"; + + case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: + return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 + + case NPP_BAD_ARG_ERROR: + return "NPP_BAD_ARGUMENT_ERROR"; + + case NPP_COEFF_ERROR: + return "NPP_COEFFICIENT_ERROR"; + + case NPP_RECT_ERROR: + return "NPP_RECTANGLE_ERROR"; + + case NPP_QUAD_ERROR: + return "NPP_QUADRANGLE_ERROR"; + + case NPP_MEM_ALLOC_ERR: + return "NPP_MEMORY_ALLOCATION_ERROR"; + + case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: + return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; + + case NPP_INVALID_INPUT: + return "NPP_INVALID_INPUT"; + + case NPP_POINTER_ERROR: + return "NPP_POINTER_ERROR"; + + case NPP_WARNING: + return "NPP_WARNING"; + + case NPP_ODD_ROI_WARNING: + return "NPP_ODD_ROI_WARNING"; +#else + + // These are for CUDA 5.5 or higher + case NPP_BAD_ARGUMENT_ERROR: + return "NPP_BAD_ARGUMENT_ERROR"; + + case NPP_COEFFICIENT_ERROR: + return "NPP_COEFFICIENT_ERROR"; + + case NPP_RECTANGLE_ERROR: + return "NPP_RECTANGLE_ERROR"; + + case NPP_QUADRANGLE_ERROR: + return "NPP_QUADRANGLE_ERROR"; + + case NPP_MEMORY_ALLOCATION_ERR: + return "NPP_MEMORY_ALLOCATION_ERROR"; + + case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: + return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; + + case NPP_INVALID_HOST_POINTER_ERROR: + return "NPP_INVALID_HOST_POINTER_ERROR"; + + case NPP_INVALID_DEVICE_POINTER_ERROR: + return "NPP_INVALID_DEVICE_POINTER_ERROR"; +#endif + + case NPP_LUT_NUMBER_OF_LEVELS_ERROR: + return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; + + case NPP_TEXTURE_BIND_ERROR: + return "NPP_TEXTURE_BIND_ERROR"; + + case NPP_WRONG_INTERSECTION_ROI_ERROR: + return "NPP_WRONG_INTERSECTION_ROI_ERROR"; + + case NPP_NOT_EVEN_STEP_ERROR: + return "NPP_NOT_EVEN_STEP_ERROR"; + + case NPP_INTERPOLATION_ERROR: + return "NPP_INTERPOLATION_ERROR"; + + case NPP_RESIZE_FACTOR_ERROR: + return "NPP_RESIZE_FACTOR_ERROR"; + + case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: + return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 + + case NPP_MEMFREE_ERR: + return "NPP_MEMFREE_ERR"; + + case NPP_MEMSET_ERR: + return "NPP_MEMSET_ERR"; + + case NPP_MEMCPY_ERR: + return "NPP_MEMCPY_ERROR"; + + case NPP_MIRROR_FLIP_ERR: + return "NPP_MIRROR_FLIP_ERR"; +#else + + case NPP_MEMFREE_ERROR: + return "NPP_MEMFREE_ERROR"; + + case NPP_MEMSET_ERROR: + return "NPP_MEMSET_ERROR"; + + case NPP_MEMCPY_ERROR: + return "NPP_MEMCPY_ERROR"; + + case NPP_MIRROR_FLIP_ERROR: + return "NPP_MIRROR_FLIP_ERROR"; +#endif + + case NPP_ALIGNMENT_ERROR: + return "NPP_ALIGNMENT_ERROR"; + + case NPP_STEP_ERROR: + return "NPP_STEP_ERROR"; + + case NPP_SIZE_ERROR: + return "NPP_SIZE_ERROR"; + + case NPP_NULL_POINTER_ERROR: + return "NPP_NULL_POINTER_ERROR"; + + case NPP_CUDA_KERNEL_EXECUTION_ERROR: + return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; + + case NPP_NOT_IMPLEMENTED_ERROR: + return "NPP_NOT_IMPLEMENTED_ERROR"; + + case NPP_ERROR: + return "NPP_ERROR"; + + case NPP_SUCCESS: + return "NPP_SUCCESS"; + + case NPP_WRONG_INTERSECTION_QUAD_WARNING: + return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; + + case NPP_MISALIGNED_DST_ROI_WARNING: + return "NPP_MISALIGNED_DST_ROI_WARNING"; + + case NPP_AFFINE_QUAD_INCORRECT_WARNING: + return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; + + case NPP_DOUBLE_SIZE_WARNING: + return "NPP_DOUBLE_SIZE_WARNING"; + + case NPP_WRONG_INTERSECTION_ROI_WARNING: + return "NPP_WRONG_INTERSECTION_ROI_WARNING"; + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 + /* These are 6.0 or higher */ + case NPP_LUT_PALETTE_BITSIZE_ERROR: + return "NPP_LUT_PALETTE_BITSIZE_ERROR"; + + case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: + return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; + + case NPP_QUALITY_INDEX_ERROR: + return "NPP_QUALITY_INDEX_ERROR"; + + case NPP_CHANNEL_ORDER_ERROR: + return "NPP_CHANNEL_ORDER_ERROR"; + + case NPP_ZERO_MASK_VALUE_ERROR: + return "NPP_ZERO_MASK_VALUE_ERROR"; + + case NPP_NUMBER_OF_CHANNELS_ERROR: + return "NPP_NUMBER_OF_CHANNELS_ERROR"; + + case NPP_COI_ERROR: + return "NPP_COI_ERROR"; + + case NPP_DIVISOR_ERROR: + return "NPP_DIVISOR_ERROR"; + + case NPP_CHANNEL_ERROR: + return "NPP_CHANNEL_ERROR"; + + case NPP_STRIDE_ERROR: + return "NPP_STRIDE_ERROR"; + + case NPP_ANCHOR_ERROR: + return "NPP_ANCHOR_ERROR"; + + case NPP_MASK_SIZE_ERROR: + return "NPP_MASK_SIZE_ERROR"; + + case NPP_MOMENT_00_ZERO_ERROR: + return "NPP_MOMENT_00_ZERO_ERROR"; + + case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: + return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; + + case NPP_THRESHOLD_ERROR: + return "NPP_THRESHOLD_ERROR"; + + case NPP_CONTEXT_MATCH_ERROR: + return "NPP_CONTEXT_MATCH_ERROR"; + + case NPP_FFT_FLAG_ERROR: + return "NPP_FFT_FLAG_ERROR"; + + case NPP_FFT_ORDER_ERROR: + return "NPP_FFT_ORDER_ERROR"; + + case NPP_SCALE_RANGE_ERROR: + return "NPP_SCALE_RANGE_ERROR"; + + case NPP_DATA_TYPE_ERROR: + return "NPP_DATA_TYPE_ERROR"; + + case NPP_OUT_OFF_RANGE_ERROR: + return "NPP_OUT_OFF_RANGE_ERROR"; + + case NPP_DIVIDE_BY_ZERO_ERROR: + return "NPP_DIVIDE_BY_ZERO_ERROR"; + + case NPP_RANGE_ERROR: + return "NPP_RANGE_ERROR"; + + case NPP_NO_MEMORY_ERROR: + return "NPP_NO_MEMORY_ERROR"; + + case NPP_ERROR_RESERVED: + return "NPP_ERROR_RESERVED"; + + case NPP_NO_OPERATION_WARNING: + return "NPP_NO_OPERATION_WARNING"; + + case NPP_DIVIDE_BY_ZERO_WARNING: + return "NPP_DIVIDE_BY_ZERO_WARNING"; +#endif + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 + /* These are 7.0 or higher */ + case NPP_OVERFLOW_ERROR: + return "NPP_OVERFLOW_ERROR"; + + case NPP_CORRUPTED_DATA_ERROR: + return "NPP_CORRUPTED_DATA_ERROR"; +#endif + } + + return "<unknown>"; +} +#endif + +template <typename T> +void check(T result, char const *const func, const char *const file, + int const line) { + if (result) { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, + static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); + exit(EXIT_FAILURE); + } +} + +#ifdef __DRIVER_TYPES_H__ +// This will output the proper CUDA error strings in the event +// that a CUDA host call returns an error +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) + +// This will output the proper error string when calling cudaGetLastError +#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__) + +inline void __getLastCudaError(const char *errorMessage, const char *file, + const int line) { + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, static_cast<int>(err), + cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +// This will only print the proper error string when calling cudaGetLastError +// but not exit program incase error detected. +#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__) + +inline void __printLastCudaError(const char *errorMessage, const char *file, + const int line) { + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, static_cast<int>(err), + cudaGetErrorString(err)); + } +} +#endif + +#ifndef MAX +#define MAX(a, b) (a > b ? a : b) +#endif + +// Float To Int conversion +inline int ftoi(float value) { + return (value >= 0 ? static_cast<int>(value + 0.5) + : static_cast<int>(value - 0.5)); +} + +// Beginning of GPU Architecture definitions +inline int _ConvertSMVer2Cores(int major, int minor) { + // Defines for GPU Architecture types (using the SM version to determine + // the # of cores per SM + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, + // and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = { + {0x30, 192}, + {0x32, 192}, + {0x35, 192}, + {0x37, 192}, + {0x50, 128}, + {0x52, 128}, + {0x53, 128}, + {0x60, 64}, + {0x61, 128}, + {0x62, 128}, + {0x70, 64}, + {0x72, 64}, + {0x75, 64}, + {0x80, 64}, + {0x86, 128}, + {-1, -1}}; + + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { + return nGpuArchCoresPerSM[index].Cores; + } + + index++; + } + + // If we don't find the values, we default use the previous one + // to run properly + printf( + "MapSMtoCores for SM %d.%d is undefined." + " Default to use %d Cores/SM\n", + major, minor, nGpuArchCoresPerSM[index - 1].Cores); + return nGpuArchCoresPerSM[index - 1].Cores; +} + +inline const char* _ConvertSMVer2ArchName(int major, int minor) { + // Defines for GPU Architecture types (using the SM version to determine + // the GPU Arch name) + typedef struct { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, + // and m = SM minor version + const char* name; + } sSMtoArchName; + + sSMtoArchName nGpuArchNameSM[] = { + {0x30, "Kepler"}, + {0x32, "Kepler"}, + {0x35, "Kepler"}, + {0x37, "Kepler"}, + {0x50, "Maxwell"}, + {0x52, "Maxwell"}, + {0x53, "Maxwell"}, + {0x60, "Pascal"}, + {0x61, "Pascal"}, + {0x62, "Pascal"}, + {0x70, "Volta"}, + {0x72, "Xavier"}, + {0x75, "Turing"}, + {0x80, "Ampere"}, + {0x86, "Ampere"}, + {-1, "Graphics Device"}}; + + int index = 0; + + while (nGpuArchNameSM[index].SM != -1) { + if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) { + return nGpuArchNameSM[index].name; + } + + index++; + } + + // If we don't find the values, we default use the previous one + // to run properly + printf( + "MapSMtoArchName for SM %d.%d is undefined." + " Default to use %s\n", + major, minor, nGpuArchNameSM[index - 1].name); + return nGpuArchNameSM[index - 1].name; +} +// end of GPU Architecture definitions + +#ifdef __CUDA_RUNTIME_H__ +// General GPU Device CUDA Initialization +inline int gpuDeviceInit(int devID) { + int device_count; + checkCudaErrors(cudaGetDeviceCount(&device_count)); + + if (device_count == 0) { + fprintf(stderr, + "gpuDeviceInit() CUDA error: " + "no devices supporting CUDA.\n"); + exit(EXIT_FAILURE); + } + + if (devID < 0) { + devID = 0; + } + + if (devID > device_count - 1) { + fprintf(stderr, "\n"); + fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", + device_count); + fprintf(stderr, + ">> gpuDeviceInit (-device=%d) is not a valid" + " GPU device. <<\n", + devID); + fprintf(stderr, "\n"); + return -devID; + } + + int computeMode = -1, major = 0, minor = 0; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID)); + checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID)); + checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID)); + if (computeMode == cudaComputeModeProhibited) { + fprintf(stderr, + "Error: device is running in <Compute Mode " + "Prohibited>, no threads can use cudaSetDevice().\n"); + return -1; + } + + if (major < 1) { + fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); + exit(EXIT_FAILURE); + } + + checkCudaErrors(cudaSetDevice(devID)); + printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor)); + + return devID; +} + +// This function returns the best GPU (with maximum GFLOPS) +inline int gpuGetMaxGflopsDeviceId() { + int current_device = 0, sm_per_multiproc = 0; + int max_perf_device = 0; + int device_count = 0; + int devices_prohibited = 0; + + uint64_t max_compute_perf = 0; + checkCudaErrors(cudaGetDeviceCount(&device_count)); + + if (device_count == 0) { + fprintf(stderr, + "gpuGetMaxGflopsDeviceId() CUDA error:" + " no devices supporting CUDA.\n"); + exit(EXIT_FAILURE); + } + + // Find the best CUDA capable GPU device + current_device = 0; + + while (current_device < device_count) { + int computeMode = -1, major = 0, minor = 0; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); + checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device)); + checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device)); + + // If this GPU is not running on Compute Mode prohibited, + // then we can add it to the list + if (computeMode != cudaComputeModeProhibited) { + if (major == 9999 && minor == 9999) { + sm_per_multiproc = 1; + } else { + sm_per_multiproc = + _ConvertSMVer2Cores(major, minor); + } + int multiProcessorCount = 0, clockRate = 0; + checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device)); + cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device); + if (result != cudaSuccess) { + // If cudaDevAttrClockRate attribute is not supported we + // set clockRate as 1, to consider GPU with most SMs and CUDA Cores. + if(result == cudaErrorInvalidValue) { + clockRate = 1; + } + else { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__, + static_cast<unsigned int>(result), _cudaGetErrorEnum(result)); + exit(EXIT_FAILURE); + } + } + uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate; + + if (compute_perf > max_compute_perf) { + max_compute_perf = compute_perf; + max_perf_device = current_device; + } + } else { + devices_prohibited++; + } + + ++current_device; + } + + if (devices_prohibited == device_count) { + fprintf(stderr, + "gpuGetMaxGflopsDeviceId() CUDA error:" + " all devices have compute mode prohibited.\n"); + exit(EXIT_FAILURE); + } + + return max_perf_device; +} + +// Initialization code to find the best CUDA Device +inline int findCudaDevice(int argc, const char **argv) { + int devID = 0; + + // If the command-line has a device number specified, use it + if (checkCmdLineFlag(argc, argv, "device")) { + devID = getCmdLineArgumentInt(argc, argv, "device="); + + if (devID < 0) { + printf("Invalid command line parameter\n "); + exit(EXIT_FAILURE); + } else { + devID = gpuDeviceInit(devID); + + if (devID < 0) { + printf("exiting...\n"); + exit(EXIT_FAILURE); + } + } + } else { + // Otherwise pick the device with highest Gflops/s + devID = gpuGetMaxGflopsDeviceId(); + checkCudaErrors(cudaSetDevice(devID)); + int major = 0, minor = 0; + checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID)); + checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID)); + printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", + devID, _ConvertSMVer2ArchName(major, minor), major, minor); + + } + + return devID; +} + +inline int findIntegratedGPU() { + int current_device = 0; + int device_count = 0; + int devices_prohibited = 0; + + checkCudaErrors(cudaGetDeviceCount(&device_count)); + + if (device_count == 0) { + fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); + exit(EXIT_FAILURE); + } + + // Find the integrated GPU which is compute capable + while (current_device < device_count) { + int computeMode = -1, integrated = -1; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); + checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device)); + // If GPU is integrated and is not running on Compute Mode prohibited, + // then cuda can map to GLES resource + if (integrated && (computeMode != cudaComputeModeProhibited)) { + checkCudaErrors(cudaSetDevice(current_device)); + + int major = 0, minor = 0; + checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device)); + checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device)); + printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", + current_device, _ConvertSMVer2ArchName(major, minor), major, minor); + + return current_device; + } else { + devices_prohibited++; + } + + current_device++; + } + + if (devices_prohibited == device_count) { + fprintf(stderr, + "CUDA error:" + " No GLES-CUDA Interop capable GPU found.\n"); + exit(EXIT_FAILURE); + } + + return -1; +} + +// General check for CUDA GPU SM Capabilities +inline bool checkCudaCapabilities(int major_version, int minor_version) { + int dev; + int major = 0, minor = 0; + + checkCudaErrors(cudaGetDevice(&dev)); + checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev)); + checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev)); + + if ((major > major_version) || + (major == major_version && + minor >= minor_version)) { + printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, + _ConvertSMVer2ArchName(major, minor), major, minor); + return true; + } else { + printf( + " No GPU device was found that can support " + "CUDA compute capability %d.%d.\n", + major_version, minor_version); + return false; + } +} +#endif + +// end of CUDA Helper Functions + +#endif // COMMON_HELPER_CUDA_H_ \ No newline at end of file diff --git a/3rdParty/cuda_samples/helper_functions.h b/3rdParty/cuda_samples/helper_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..3fc2ea47ba7d39a4bf6a882f65b16779de6de0ac --- /dev/null +++ b/3rdParty/cuda_samples/helper_functions.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// These are helper functions for the SDK samples (string parsing, +// timers, image helpers, etc) +#ifndef COMMON_HELPER_FUNCTIONS_H_ +#define COMMON_HELPER_FUNCTIONS_H_ + +#ifdef WIN32 +#pragma warning(disable : 4996) +#endif + +// includes, project +#include <assert.h> +#include <exception.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +// includes, timer, string parsing, image helpers +#include <helper_image.h> // helper functions for image compare, dump, data comparisons +#include <helper_string.h> // helper functions for string parsing +#include <helper_timer.h> // helper functions for timers + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +#endif // COMMON_HELPER_FUNCTIONS_H_ \ No newline at end of file diff --git a/3rdParty/cuda_samples/helper_image.h b/3rdParty/cuda_samples/helper_image.h new file mode 100644 index 0000000000000000000000000000000000000000..eb7190c21b39463222beb579337d14c1a54b0000 --- /dev/null +++ b/3rdParty/cuda_samples/helper_image.h @@ -0,0 +1,1009 @@ +// +// Created by Soeren Peters on 05.02.21. +// + +#ifndef VIRTUALFLUIDS_HELPER_IMAGE_H +#define VIRTUALFLUIDS_HELPER_IMAGE_H + +#endif // VIRTUALFLUIDS_HELPER_IMAGE_H +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// These are helper functions for the SDK samples (image,bitmap) +#ifndef COMMON_HELPER_IMAGE_H_ +#define COMMON_HELPER_IMAGE_H_ + +#include <assert.h> +#include <exception.h> +#include <math.h> +#include <stdint.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +#ifndef MIN +#define MIN(a, b) ((a < b) ? a : b) +#endif +#ifndef MAX +#define MAX(a, b) ((a > b) ? a : b) +#endif + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +#include <helper_string.h> + +// namespace unnamed (internal) +namespace helper_image_internal { +//! size of PGM file header +const unsigned int PGMHeaderSize = 0x40; + +// types + +//! Data converter from unsigned char / unsigned byte to type T +template <class T> +struct ConverterFromUByte; + +//! Data converter from unsigned char / unsigned byte +template <> +struct ConverterFromUByte<unsigned char> { + //! Conversion operator + //! @return converted value + //! @param val value to convert + float operator()(const unsigned char &val) { + return static_cast<unsigned char>(val); + } +}; + +//! Data converter from unsigned char / unsigned byte to float +template <> +struct ConverterFromUByte<float> { + //! Conversion operator + //! @return converted value + //! @param val value to convert + float operator()(const unsigned char &val) { + return static_cast<float>(val) / 255.0f; + } +}; + +//! Data converter from unsigned char / unsigned byte to type T +template <class T> +struct ConverterToUByte; + +//! Data converter from unsigned char / unsigned byte to unsigned int +template <> +struct ConverterToUByte<unsigned char> { + //! Conversion operator (essentially a passthru + //! @return converted value + //! @param val value to convert + unsigned char operator()(const unsigned char &val) { return val; } +}; + +//! Data converter from unsigned char / unsigned byte to unsigned int +template <> +struct ConverterToUByte<float> { + //! Conversion operator + //! @return converted value + //! @param val value to convert + unsigned char operator()(const float &val) { + return static_cast<unsigned char>(val * 255.0f); + } +}; +} // namespace helper_image_internal + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#ifndef FOPEN +#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result != 0) +#endif +#ifndef SSCANF +#define SSCANF sscanf_s +#endif +#else +#ifndef FOPEN +#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode)) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result == NULL) +#endif +#ifndef SSCANF +#define SSCANF sscanf +#endif +#endif + +inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, + unsigned int *h, unsigned int *channels) { + FILE *fp = NULL; + + if (FOPEN_FAIL(FOPEN(fp, file, "rb"))) { + std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl; + return false; + } + + // check header + char header[helper_image_internal::PGMHeaderSize]; + + if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) { + std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl; + return false; + } + + if (strncmp(header, "P5", 2) == 0) { + *channels = 1; + } else if (strncmp(header, "P6", 2) == 0) { + *channels = 3; + } else { + std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl; + *channels = 0; + return false; + } + + // parse header, read maxval, width and height + unsigned int width = 0; + unsigned int height = 0; + unsigned int maxval = 0; + unsigned int i = 0; + + while (i < 3) { + if (fgets(header, helper_image_internal::PGMHeaderSize, fp) == NULL) { + std::cerr << "__LoadPPM() : reading PGM header returned NULL" + << std::endl; + return false; + } + + if (header[0] == '#') { + continue; + } + + if (i == 0) { + i += SSCANF(header, "%u %u %u", &width, &height, &maxval); + } else if (i == 1) { + i += SSCANF(header, "%u %u", &height, &maxval); + } else if (i == 2) { + i += SSCANF(header, "%u", &maxval); + } + } + + // check if given handle for the data is initialized + if (NULL != *data) { + if (*w != width || *h != height) { + std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl; + } + } else { + *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height * + *channels); + *w = width; + *h = height; + } + + // read and close file + if (fread(*data, sizeof(unsigned char), width * height * *channels, fp) == + 0) { + std::cerr << "__LoadPPM() read data returned error." << std::endl; + } + + fclose(fp); + + return true; +} + +template <class T> +inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, + unsigned int *h) { + unsigned char *idata = NULL; + unsigned int channels; + + if (true != __loadPPM(file, &idata, w, h, &channels)) { + return false; + } + + unsigned int size = *w * *h * channels; + + // initialize mem if necessary + // the correct size is checked / set in loadPGMc() + if (NULL == *data) { + *data = reinterpret_cast<T *>(malloc(sizeof(T) * size)); + } + + // copy and cast data + std::transform(idata, idata + size, *data, + helper_image_internal::ConverterFromUByte<T>()); + + free(idata); + + return true; +} + +template <class T> +inline bool sdkLoadPPM4(const char *file, T **data, unsigned int *w, + unsigned int *h) { + unsigned char *idata = 0; + unsigned int channels; + + if (__loadPPM(file, &idata, w, h, &channels)) { + // pad 4th component + int size = *w * *h; + // keep the original pointer + unsigned char *idata_orig = idata; + *data = reinterpret_cast<T *>(malloc(sizeof(T) * size * 4)); + unsigned char *ptr = *data; + + for (int i = 0; i < size; i++) { + *ptr++ = *idata++; + *ptr++ = *idata++; + *ptr++ = *idata++; + *ptr++ = 0; + } + + free(idata_orig); + return true; + } else { + free(idata); + return false; + } +} + +inline bool __savePPM(const char *file, unsigned char *data, unsigned int w, + unsigned int h, unsigned int channels) { + assert(NULL != data); + assert(w > 0); + assert(h > 0); + + std::fstream fh(file, std::fstream::out | std::fstream::binary); + + if (fh.bad()) { + std::cerr << "__savePPM() : Opening file failed." << std::endl; + return false; + } + + if (channels == 1) { + fh << "P5\n"; + } else if (channels == 3) { + fh << "P6\n"; + } else { + std::cerr << "__savePPM() : Invalid number of channels." << std::endl; + return false; + } + + fh << w << "\n" << h << "\n" << 0xff << std::endl; + + for (unsigned int i = 0; (i < (w * h * channels)) && fh.good(); ++i) { + fh << data[i]; + } + + fh.flush(); + + if (fh.bad()) { + std::cerr << "__savePPM() : Writing data failed." << std::endl; + return false; + } + + fh.close(); + + return true; +} + +template <class T> +inline bool sdkSavePGM(const char *file, T *data, unsigned int w, + unsigned int h) { + unsigned int size = w * h; + unsigned char *idata = (unsigned char *)malloc(sizeof(unsigned char) * size); + + std::transform(data, data + size, idata, + helper_image_internal::ConverterToUByte<T>()); + + // write file + bool result = __savePPM(file, idata, w, h, 1); + + // cleanup + free(idata); + + return result; +} + +inline bool sdkSavePPM4ub(const char *file, unsigned char *data, unsigned int w, + unsigned int h) { + // strip 4th component + int size = w * h; + unsigned char *ndata = + (unsigned char *)malloc(sizeof(unsigned char) * size * 3); + unsigned char *ptr = ndata; + + for (int i = 0; i < size; i++) { + *ptr++ = *data++; + *ptr++ = *data++; + *ptr++ = *data++; + data++; + } + + bool result = __savePPM(file, ndata, w, h, 3); + free(ndata); + return result; +} + +////////////////////////////////////////////////////////////////////////////// +//! Read file \filename and return the data +//! @return bool if reading the file succeeded, otherwise false +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +////////////////////////////////////////////////////////////////////////////// +template <class T> +inline bool sdkReadFile(const char *filename, T **data, unsigned int *len, + bool verbose) { + // check input arguments + assert(NULL != filename); + assert(NULL != len); + + // intermediate storage for the data read + std::vector<T> data_read; + + // open file for reading + FILE *fh = NULL; + + // check if filestream is valid + if (FOPEN_FAIL(FOPEN(fh, filename, "r"))) { + printf("Unable to open input file: %s\n", filename); + return false; + } + + // read all data elements + T token; + + while (!feof(fh)) { + fscanf(fh, "%f", &token); + data_read.push_back(token); + } + + // the last element is read twice + data_read.pop_back(); + fclose(fh); + + // check if the given handle is already initialized + if (NULL != *data) { + if (*len != data_read.size()) { + std::cerr << "sdkReadFile() : Initialized memory given but " + << "size mismatch with signal read " + << "(data read / data init = " << (unsigned int)data_read.size() + << " / " << *len << ")" << std::endl; + + return false; + } + } else { + // allocate storage for the data read + *data = reinterpret_cast<T *>(malloc(sizeof(T) * data_read.size())); + // store signal size + *len = static_cast<unsigned int>(data_read.size()); + } + + // copy data + memcpy(*data, &data_read.front(), sizeof(T) * data_read.size()); + + return true; +} + +////////////////////////////////////////////////////////////////////////////// +//! Read file \filename and return the data +//! @return bool if reading the file succeeded, otherwise false +//! @param filename name of the source file +//! @param data uninitialized pointer, returned initialized and pointing to +//! the data read +//! @param len number of data elements in data, -1 on error +////////////////////////////////////////////////////////////////////////////// +template <class T> +inline bool sdkReadFileBlocks(const char *filename, T **data, unsigned int *len, + unsigned int block_num, unsigned int block_size, + bool verbose) { + // check input arguments + assert(NULL != filename); + assert(NULL != len); + + // open file for reading + FILE *fh = fopen(filename, "rb"); + + if (fh == NULL && verbose) { + std::cerr << "sdkReadFile() : Opening file failed." << std::endl; + return false; + } + + // check if the given handle is already initialized + // allocate storage for the data read + data[block_num] = reinterpret_cast<T *>(malloc(block_size)); + + // read all data elements + fseek(fh, block_num * block_size, SEEK_SET); + *len = fread(data[block_num], sizeof(T), block_size / sizeof(T), fh); + + fclose(fh); + + return true; +} + +////////////////////////////////////////////////////////////////////////////// +//! Write a data file \filename +//! @return true if writing the file succeeded, otherwise false +//! @param filename name of the source file +//! @param data data to write +//! @param len number of data elements in data, -1 on error +//! @param epsilon epsilon for comparison +////////////////////////////////////////////////////////////////////////////// +template <class T, class S> +inline bool sdkWriteFile(const char *filename, const T *data, unsigned int len, + const S epsilon, bool verbose, bool append = false) { + assert(NULL != filename); + assert(NULL != data); + + // open file for writing + // if (append) { + std::fstream fh(filename, std::fstream::out | std::fstream::ate); + + if (verbose) { + std::cerr << "sdkWriteFile() : Open file " << filename + << " for write/append." << std::endl; + } + + /* } else { + std::fstream fh(filename, std::fstream::out); + if (verbose) { + std::cerr << "sdkWriteFile() : Open file " << filename << " for + write." << std::endl; + } + } + */ + + // check if filestream is valid + if (!fh.good()) { + if (verbose) { + std::cerr << "sdkWriteFile() : Opening file failed." << std::endl; + } + + return false; + } + + // first write epsilon + fh << "# " << epsilon << "\n"; + + // write data + for (unsigned int i = 0; (i < len) && (fh.good()); ++i) { + fh << data[i] << ' '; + } + + // Check if writing succeeded + if (!fh.good()) { + if (verbose) { + std::cerr << "sdkWriteFile() : Writing file failed." << std::endl; + } + + return false; + } + + // file ends with nl + fh << std::endl; + + return true; +} + +////////////////////////////////////////////////////////////////////////////// +//! Compare two arrays of arbitrary type +//! @return true if \a reference and \a data are identical, otherwise false +//! @param reference timer_interface to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +////////////////////////////////////////////////////////////////////////////// +template <class T, class S> +inline bool compareData(const T *reference, const T *data, + const unsigned int len, const S epsilon, + const float threshold) { + assert(epsilon >= 0); + + bool result = true; + unsigned int error_count = 0; + + for (unsigned int i = 0; i < len; ++i) { + float diff = static_cast<float>(reference[i]) - static_cast<float>(data[i]); + bool comp = (diff <= epsilon) && (diff >= -epsilon); + result &= comp; + + error_count += !comp; + +#if 0 + + if (!comp) { + std::cerr << "ERROR, i = " << i << ",\t " + << reference[i] << " / " + << data[i] + << " (reference / data)\n"; + } + +#endif + } + + if (threshold == 0.0f) { + return (result) ? true : false; + } else { + if (error_count) { + printf("%4.2f(%%) of bytes mismatched (count=%d)\n", + static_cast<float>(error_count) * 100 / static_cast<float>(len), + error_count); + } + + return (len * threshold > error_count) ? true : false; + } +} + +#ifndef __MIN_EPSILON_ERROR +#define __MIN_EPSILON_ERROR 1e-3f +#endif + +////////////////////////////////////////////////////////////////////////////// +//! Compare two arrays of arbitrary type +//! @return true if \a reference and \a data are identical, otherwise false +//! @param reference handle to the reference data / gold image +//! @param data handle to the computed data +//! @param len number of elements in reference and data +//! @param epsilon epsilon to use for the comparison +//! @param epsilon threshold % of (# of bytes) for pass/fail +////////////////////////////////////////////////////////////////////////////// +template <class T, class S> +inline bool compareDataAsFloatThreshold(const T *reference, const T *data, + const unsigned int len, const S epsilon, + const float threshold) { + assert(epsilon >= 0); + + // If we set epsilon to be 0, let's set a minimum threshold + float max_error = MAX((float)epsilon, __MIN_EPSILON_ERROR); + int error_count = 0; + bool result = true; + + for (unsigned int i = 0; i < len; ++i) { + float diff = + fabs(static_cast<float>(reference[i]) - static_cast<float>(data[i])); + bool comp = (diff < max_error); + result &= comp; + + if (!comp) { + error_count++; + } + } + + if (threshold == 0.0f) { + if (error_count) { + printf("total # of errors = %d\n", error_count); + } + + return (error_count == 0) ? true : false; + } else { + if (error_count) { + printf("%4.2f(%%) of bytes mismatched (count=%d)\n", + static_cast<float>(error_count) * 100 / static_cast<float>(len), + error_count); + } + + return ((len * threshold > error_count) ? true : false); + } +} + +inline void sdkDumpBin(void *data, unsigned int bytes, const char *filename) { + printf("sdkDumpBin: <%s>\n", filename); + FILE *fp; + FOPEN(fp, filename, "wb"); + fwrite(data, bytes, 1, fp); + fflush(fp); + fclose(fp); +} + +inline bool sdkCompareBin2BinUint(const char *src_file, const char *ref_file, + unsigned int nelements, const float epsilon, + const float threshold, char *exec_path) { + unsigned int *src_buffer, *ref_buffer; + FILE *src_fp = NULL, *ref_fp = NULL; + + uint64_t error_count = 0; + size_t fsize = 0; + + if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) { + printf("compareBin2Bin <unsigned int> unable to open src_file: %s\n", + src_file); + error_count++; + } + + char *ref_file_path = sdkFindFilePath(ref_file, exec_path); + + if (ref_file_path == NULL) { + printf("compareBin2Bin <unsigned int> unable to find <%s> in <%s>\n", + ref_file, exec_path); + printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", + ref_file); + printf("Aborting comparison!\n"); + printf(" FAILED\n"); + error_count++; + + if (src_fp) { + fclose(src_fp); + } + + if (ref_fp) { + fclose(ref_fp); + } + } else { + if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) { + printf( + "compareBin2Bin <unsigned int>" + " unable to open ref_file: %s\n", + ref_file_path); + error_count++; + } + + if (src_fp && ref_fp) { + src_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int)); + ref_buffer = (unsigned int *)malloc(nelements * sizeof(unsigned int)); + + fsize = fread(src_buffer, nelements, sizeof(unsigned int), src_fp); + fsize = fread(ref_buffer, nelements, sizeof(unsigned int), ref_fp); + + printf( + "> compareBin2Bin <unsigned int> nelements=%d," + " epsilon=%4.2f, threshold=%4.2f\n", + nelements, epsilon, threshold); + printf(" src_file <%s>, size=%d bytes\n", src_file, + static_cast<int>(fsize)); + printf(" ref_file <%s>, size=%d bytes\n", ref_file_path, + static_cast<int>(fsize)); + + if (!compareData<unsigned int, float>(ref_buffer, src_buffer, nelements, + epsilon, threshold)) { + error_count++; + } + + fclose(src_fp); + fclose(ref_fp); + + free(src_buffer); + free(ref_buffer); + } else { + if (src_fp) { + fclose(src_fp); + } + + if (ref_fp) { + fclose(ref_fp); + } + } + } + + if (error_count == 0) { + printf(" OK\n"); + } else { + printf(" FAILURE: %d errors...\n", (unsigned int)error_count); + } + + return (error_count == 0); // returns true if all pixels pass +} + +inline bool sdkCompareBin2BinFloat(const char *src_file, const char *ref_file, + unsigned int nelements, const float epsilon, + const float threshold, char *exec_path) { + float *src_buffer = NULL, *ref_buffer = NULL; + FILE *src_fp = NULL, *ref_fp = NULL; + size_t fsize = 0; + + uint64_t error_count = 0; + + if (FOPEN_FAIL(FOPEN(src_fp, src_file, "rb"))) { + printf("compareBin2Bin <float> unable to open src_file: %s\n", src_file); + error_count = 1; + } + + char *ref_file_path = sdkFindFilePath(ref_file, exec_path); + + if (ref_file_path == NULL) { + printf("compareBin2Bin <float> unable to find <%s> in <%s>\n", ref_file, + exec_path); + printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", + exec_path); + printf("Aborting comparison!\n"); + printf(" FAILED\n"); + error_count++; + + if (src_fp) { + fclose(src_fp); + } + + if (ref_fp) { + fclose(ref_fp); + } + } else { + if (FOPEN_FAIL(FOPEN(ref_fp, ref_file_path, "rb"))) { + printf("compareBin2Bin <float> unable to open ref_file: %s\n", + ref_file_path); + error_count = 1; + } + + if (src_fp && ref_fp) { + src_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float))); + ref_buffer = reinterpret_cast<float *>(malloc(nelements * sizeof(float))); + + printf( + "> compareBin2Bin <float> nelements=%d, epsilon=%4.2f," + " threshold=%4.2f\n", + nelements, epsilon, threshold); + fsize = fread(src_buffer, sizeof(float), nelements, src_fp); + printf(" src_file <%s>, size=%d bytes\n", src_file, + static_cast<int>(fsize * sizeof(float))); + fsize = fread(ref_buffer, sizeof(float), nelements, ref_fp); + printf(" ref_file <%s>, size=%d bytes\n", ref_file_path, + static_cast<int>(fsize * sizeof(float))); + + if (!compareDataAsFloatThreshold<float, float>( + ref_buffer, src_buffer, nelements, epsilon, threshold)) { + error_count++; + } + + fclose(src_fp); + fclose(ref_fp); + + free(src_buffer); + free(ref_buffer); + } else { + if (src_fp) { + fclose(src_fp); + } + + if (ref_fp) { + fclose(ref_fp); + } + } + } + + if (error_count == 0) { + printf(" OK\n"); + } else { + printf(" FAILURE: %d errors...\n", (unsigned int)error_count); + } + + return (error_count == 0); // returns true if all pixels pass +} + +inline bool sdkCompareL2fe(const float *reference, const float *data, + const unsigned int len, const float epsilon) { + assert(epsilon >= 0); + + float error = 0; + float ref = 0; + + for (unsigned int i = 0; i < len; ++i) { + float diff = reference[i] - data[i]; + error += diff * diff; + ref += reference[i] * reference[i]; + } + + float normRef = sqrtf(ref); + + if (fabs(ref) < 1e-7) { +#ifdef _DEBUG + std::cerr << "ERROR, reference l2-norm is 0\n"; +#endif + return false; + } + + float normError = sqrtf(error); + error = normError / normRef; + bool result = error < epsilon; +#ifdef _DEBUG + + if (!result) { + std::cerr << "ERROR, l2-norm error " << error << " is greater than epsilon " + << epsilon << "\n"; + } + +#endif + + return result; +} + +inline bool sdkLoadPPMub(const char *file, unsigned char **data, + unsigned int *w, unsigned int *h) { + unsigned int channels; + return __loadPPM(file, data, w, h, &channels); +} + +inline bool sdkLoadPPM4ub(const char *file, unsigned char **data, + unsigned int *w, unsigned int *h) { + unsigned char *idata = 0; + unsigned int channels; + + if (__loadPPM(file, &idata, w, h, &channels)) { + // pad 4th component + int size = *w * *h; + // keep the original pointer + unsigned char *idata_orig = idata; + *data = (unsigned char *)malloc(sizeof(unsigned char) * size * 4); + unsigned char *ptr = *data; + + for (int i = 0; i < size; i++) { + *ptr++ = *idata++; + *ptr++ = *idata++; + *ptr++ = *idata++; + *ptr++ = 0; + } + + free(idata_orig); + return true; + } else { + free(idata); + return false; + } +} + +inline bool sdkComparePPM(const char *src_file, const char *ref_file, + const float epsilon, const float threshold, + bool verboseErrors) { + unsigned char *src_data, *ref_data; + uint64_t error_count = 0; + unsigned int ref_width, ref_height; + unsigned int src_width, src_height; + + if (src_file == NULL || ref_file == NULL) { + if (verboseErrors) { + std::cerr << "PPMvsPPM: src_file or ref_file is NULL." + " Aborting comparison\n"; + } + + return false; + } + + if (verboseErrors) { + std::cerr << "> Compare (a)rendered: <" << src_file << ">\n"; + std::cerr << "> (b)reference: <" << ref_file << ">\n"; + } + + if (sdkLoadPPM4ub(ref_file, &ref_data, &ref_width, &ref_height) != true) { + if (verboseErrors) { + std::cerr << "PPMvsPPM: unable to load ref image file: " << ref_file + << "\n"; + } + + return false; + } + + if (sdkLoadPPM4ub(src_file, &src_data, &src_width, &src_height) != true) { + std::cerr << "PPMvsPPM: unable to load src image file: " << src_file + << "\n"; + return false; + } + + if (src_height != ref_height || src_width != ref_width) { + if (verboseErrors) { + std::cerr << "PPMvsPPM: source and ref size mismatch (" << src_width + << "," << src_height << ")vs(" << ref_width << "," << ref_height + << ")\n"; + } + } + + if (verboseErrors) { + std::cerr << "PPMvsPPM: comparing images size (" << src_width << "," + << src_height << ") epsilon(" << epsilon << "), threshold(" + << threshold * 100 << "%)\n"; + } + + if (compareData(ref_data, src_data, src_width * src_height * 4, epsilon, + threshold) == false) { + error_count = 1; + } + + if (error_count == 0) { + if (verboseErrors) { + std::cerr << " OK\n\n"; + } + } else { + if (verboseErrors) { + std::cerr << " FAILURE! " << error_count << " errors...\n\n"; + } + } + + // returns true if all pixels pass + return (error_count == 0) ? true : false; +} + +inline bool sdkComparePGM(const char *src_file, const char *ref_file, + const float epsilon, const float threshold, + bool verboseErrors) { + unsigned char *src_data = 0, *ref_data = 0; + uint64_t error_count = 0; + unsigned int ref_width, ref_height; + unsigned int src_width, src_height; + + if (src_file == NULL || ref_file == NULL) { + if (verboseErrors) { + std::cerr << "PGMvsPGM: src_file or ref_file is NULL." + " Aborting comparison\n"; + } + + return false; + } + + if (verboseErrors) { + std::cerr << "> Compare (a)rendered: <" << src_file << ">\n"; + std::cerr << "> (b)reference: <" << ref_file << ">\n"; + } + + if (sdkLoadPPMub(ref_file, &ref_data, &ref_width, &ref_height) != true) { + if (verboseErrors) { + std::cerr << "PGMvsPGM: unable to load ref image file: " << ref_file + << "\n"; + } + + return false; + } + + if (sdkLoadPPMub(src_file, &src_data, &src_width, &src_height) != true) { + std::cerr << "PGMvsPGM: unable to load src image file: " << src_file + << "\n"; + return false; + } + + if (src_height != ref_height || src_width != ref_width) { + if (verboseErrors) { + std::cerr << "PGMvsPGM: source and ref size mismatch (" << src_width + << "," << src_height << ")vs(" << ref_width << "," << ref_height + << ")\n"; + } + } + + if (verboseErrors) + std::cerr << "PGMvsPGM: comparing images size (" << src_width << "," + << src_height << ") epsilon(" << epsilon << "), threshold(" + << threshold * 100 << "%)\n"; + + if (compareData(ref_data, src_data, src_width * src_height, epsilon, + threshold) == false) { + error_count = 1; + } + + if (error_count == 0) { + if (verboseErrors) { + std::cerr << " OK\n\n"; + } + } else { + if (verboseErrors) { + std::cerr << " FAILURE! " << error_count << " errors...\n\n"; + } + } + + // returns true if all pixels pass + return (error_count == 0) ? true : false; +} + +#endif // COMMON_HELPER_IMAGE_H_ \ No newline at end of file diff --git a/3rdParty/cuda_samples/helper_string.h b/3rdParty/cuda_samples/helper_string.h new file mode 100644 index 0000000000000000000000000000000000000000..c09935174ec057757c20263df080cb8d77b53f52 --- /dev/null +++ b/3rdParty/cuda_samples/helper_string.h @@ -0,0 +1,368 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// These are helper functions for the SDK samples (string parsing, timers, etc) +#ifndef COMMON_HELPER_STRING_H_ +#define COMMON_HELPER_STRING_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <fstream> +#include <string> + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE +#endif +#ifndef STRCASECMP +#define STRCASECMP _stricmp +#endif +#ifndef STRNCASECMP +#define STRNCASECMP _strnicmp +#endif +#ifndef STRCPY +#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) +#endif + +#ifndef FOPEN +#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result != 0) +#endif +#ifndef SSCANF +#define SSCANF sscanf_s +#endif +#ifndef SPRINTF +#define SPRINTF sprintf_s +#endif +#else // Linux Includes +#include <string.h> +#include <strings.h> + +#ifndef STRCASECMP +#define STRCASECMP strcasecmp +#endif +#ifndef STRNCASECMP +#define STRNCASECMP strncasecmp +#endif +#ifndef STRCPY +#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) +#endif + +#ifndef FOPEN +#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode)) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result == NULL) +#endif +#ifndef SSCANF +#define SSCANF sscanf +#endif +#ifndef SPRINTF +#define SPRINTF sprintf +#endif +#endif + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +// CUDA Utility Helper Functions +inline int stringRemoveDelimiter(char delimiter, const char *string) { + int string_start = 0; + + while (string[string_start] == delimiter) { + string_start++; + } + + if (string_start >= static_cast<int>(strlen(string) - 1)) { + return 0; + } + + return string_start; +} + +inline int getFileExtension(char *filename, char **extension) { + int string_length = static_cast<int>(strlen(filename)); + + while (filename[string_length--] != '.') { + if (string_length == 0) break; + } + + if (string_length > 0) string_length += 2; + + if (string_length == 0) + *extension = NULL; + else + *extension = &filename[string_length]; + + return string_length; +} + +inline bool checkCmdLineFlag(const int argc, const char **argv, + const char *string_ref) { + bool bFound = false; + + if (argc >= 1) { + for (int i = 1; i < argc; i++) { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + + const char *equal_pos = strchr(string_argv, '='); + int argv_length = static_cast<int>( + equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); + + int length = static_cast<int>(strlen(string_ref)); + + if (length == argv_length && + !STRNCASECMP(string_argv, string_ref, length)) { + bFound = true; + continue; + } + } + } + + return bFound; +} + +// This function wraps the CUDA Driver API into a template function +template <class T> +inline bool getCmdLineArgumentValue(const int argc, const char **argv, + const char *string_ref, T *value) { + bool bFound = false; + + if (argc >= 1) { + for (int i = 1; i < argc; i++) { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = static_cast<int>(strlen(string_ref)); + + if (!STRNCASECMP(string_argv, string_ref, length)) { + if (length + 1 <= static_cast<int>(strlen(string_argv))) { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + *value = (T)atoi(&string_argv[length + auto_inc]); + } + + bFound = true; + i = argc; + } + } + } + + return bFound; +} + +inline int getCmdLineArgumentInt(const int argc, const char **argv, + const char *string_ref) { + bool bFound = false; + int value = -1; + + if (argc >= 1) { + for (int i = 1; i < argc; i++) { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = static_cast<int>(strlen(string_ref)); + + if (!STRNCASECMP(string_argv, string_ref, length)) { + if (length + 1 <= static_cast<int>(strlen(string_argv))) { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + value = atoi(&string_argv[length + auto_inc]); + } else { + value = 0; + } + + bFound = true; + continue; + } + } + } + + if (bFound) { + return value; + } else { + return 0; + } +} + +inline float getCmdLineArgumentFloat(const int argc, const char **argv, + const char *string_ref) { + bool bFound = false; + float value = -1; + + if (argc >= 1) { + for (int i = 1; i < argc; i++) { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = static_cast<int>(strlen(string_ref)); + + if (!STRNCASECMP(string_argv, string_ref, length)) { + if (length + 1 <= static_cast<int>(strlen(string_argv))) { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + value = static_cast<float>(atof(&string_argv[length + auto_inc])); + } else { + value = 0.f; + } + + bFound = true; + continue; + } + } + } + + if (bFound) { + return value; + } else { + return 0; + } +} + +inline bool getCmdLineArgumentString(const int argc, const char **argv, + const char *string_ref, + char **string_retval) { + bool bFound = false; + + if (argc >= 1) { + for (int i = 1; i < argc; i++) { + int string_start = stringRemoveDelimiter('-', argv[i]); + char *string_argv = const_cast<char *>(&argv[i][string_start]); + int length = static_cast<int>(strlen(string_ref)); + + if (!STRNCASECMP(string_argv, string_ref, length)) { + *string_retval = &string_argv[length + 1]; + bFound = true; + continue; + } + } + } + + if (!bFound) { + *string_retval = NULL; + } + + return bFound; +} + +////////////////////////////////////////////////////////////////////////////// +//! Find the path for a file assuming that +//! files are found in the searchPath. +//! +//! @return the path if succeeded, otherwise 0 +//! @param filename name of the file +//! @param executable_path optional absolute path of the executable +////////////////////////////////////////////////////////////////////////////// +inline char *sdkFindFilePath(const char *filename, + const char *executable_path) { + // <executable_name> defines a variable that is replaced with the name of the + // executable + + // Typical relative search paths to locate needed companion files (e.g. sample + // input data, or JIT source files) The origin for the relative search may be + // the .exe file, a .bat file launching an .exe, a browser .exe launching the + // .exe or .bat, etc + const char *searchPath[] = { + "./", // same dir + "./data/", // same dir + "../../../../Samples/<executable_name>/", // up 4 in tree + "../../../Samples/<executable_name>/", // up 3 in tree + "../../Samples/<executable_name>/", // up 2 in tree + "../../../../Samples/<executable_name>/data/", // up 4 in tree + "../../../Samples/<executable_name>/data/", // up 3 in tree + "../../Samples/<executable_name>/data/", // up 2 in tree + "../../../../Common/data/", // up 4 in tree + "../../../Common/data/", // up 3 in tree + "../../Common/data/" // up 2 in tree + }; + + // Extract the executable name + std::string executable_name; + + if (executable_path != 0) { + executable_name = std::string(executable_path); + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + // Windows path delimiter + size_t delimiter_pos = executable_name.find_last_of('\\'); + executable_name.erase(0, delimiter_pos + 1); + + if (executable_name.rfind(".exe") != std::string::npos) { + // we strip .exe, only if the .exe is found + executable_name.resize(executable_name.size() - 4); + } + +#else + // Linux & OSX path delimiter + size_t delimiter_pos = executable_name.find_last_of('/'); + executable_name.erase(0, delimiter_pos + 1); +#endif + } + + // Loop over all search paths and return the first hit + for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) { + std::string path(searchPath[i]); + size_t executable_name_pos = path.find("<executable_name>"); + + // If there is executable_name variable in the searchPath + // replace it with the value + if (executable_name_pos != std::string::npos) { + if (executable_path != 0) { + path.replace(executable_name_pos, strlen("<executable_name>"), + executable_name); + } else { + // Skip this path entry if no executable argument is given + continue; + } + } + +#ifdef _DEBUG + printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); +#endif + + // Test if the file exists + path.append(filename); + FILE *fp; + FOPEN(fp, path.c_str(), "rb"); + + if (fp != NULL) { + fclose(fp); + // File found + // returning an allocated array here for backwards compatibility reasons + char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1)); + STRCPY(file_path, path.length() + 1, path.c_str()); + return file_path; + } + + if (fp) { + fclose(fp); + } + } + + // File not found + return 0; +} + +#endif // COMMON_HELPER_STRING_H_ \ No newline at end of file diff --git a/3rdParty/cuda_samples/helper_timer.h b/3rdParty/cuda_samples/helper_timer.h new file mode 100644 index 0000000000000000000000000000000000000000..51efd720993057092323ea224377e87c95d70ff2 --- /dev/null +++ b/3rdParty/cuda_samples/helper_timer.h @@ -0,0 +1,465 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Helper Timing Functions +#ifndef COMMON_HELPER_TIMER_H_ +#define COMMON_HELPER_TIMER_H_ + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +// includes, system +#include <vector> + +// includes, project +#include <exception.h> + +// Definition of the StopWatch Interface, this is used if we don't want to use +// the CUT functions But rather in a self contained class interface +class StopWatchInterface { +public: + StopWatchInterface() {} + virtual ~StopWatchInterface() {} + +public: + //! Start time measurement + virtual void start() = 0; + + //! Stop time measurement + virtual void stop() = 0; + + //! Reset time counters to zero + virtual void reset() = 0; + + //! Time in msec. after start. If the stop watch is still running (i.e. there + //! was no call to stop()) then the elapsed time is returned, otherwise the + //! time between the last start() and stop call is returned + virtual float getTime() = 0; + + //! Mean time to date based on the number of times the stopwatch has been + //! _stopped_ (ie finished sessions) and the current total time + virtual float getAverageTime() = 0; +}; + +////////////////////////////////////////////////////////////////// +// Begin Stopwatch timer class definitions for all OS platforms // +////////////////////////////////////////////////////////////////// +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +// includes, system +#define WINDOWS_LEAN_AND_MEAN +#include <windows.h> +#undef min +#undef max + +//! Windows specific implementation of StopWatch +class StopWatchWin : public StopWatchInterface { + public: + //! Constructor, default + StopWatchWin() + : start_time(), + end_time(), + diff_time(0.0f), + total_time(0.0f), + running(false), + clock_sessions(0), + freq(0), + freq_set(false) { + if (!freq_set) { + // helper variable + LARGE_INTEGER temp; + + // get the tick frequency from the OS + QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER *>(&temp)); + + // convert to type in which it is needed + freq = (static_cast<double>(temp.QuadPart)) / 1000.0; + + // rememeber query + freq_set = true; + } + } + + // Destructor + ~StopWatchWin() {} + + public: + //! Start time measurement + inline void start(); + + //! Stop time measurement + inline void stop(); + + //! Reset time counters to zero + inline void reset(); + + //! Time in msec. after start. If the stop watch is still running (i.e. there + //! was no call to stop()) then the elapsed time is returned, otherwise the + //! time between the last start() and stop call is returned + inline float getTime(); + + //! Mean time to date based on the number of times the stopwatch has been + //! _stopped_ (ie finished sessions) and the current total time + inline float getAverageTime(); + + private: + // member variables + + //! Start of measurement + LARGE_INTEGER start_time; + //! End of measurement + LARGE_INTEGER end_time; + + //! Time difference between the last start and stop + float diff_time; + + //! TOTAL time difference between starts and stops + float total_time; + + //! flag if the stop watch is running + bool running; + + //! Number of times clock has been started + //! and stopped to allow averaging + int clock_sessions; + + //! tick frequency + double freq; + + //! flag if the frequency has been set + bool freq_set; +}; + +// functions, inlined + +//////////////////////////////////////////////////////////////////////////////// +//! Start time measurement +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchWin::start() { + QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time)); + running = true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Stop time measurement and increment add to the current diff_time summation +//! variable. Also increment the number of times this clock has been run. +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchWin::stop() { + QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&end_time)); + diff_time = static_cast<float>(((static_cast<double>(end_time.QuadPart) - + static_cast<double>(start_time.QuadPart)) / + freq)); + + total_time += diff_time; + clock_sessions++; + running = false; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Reset the timer to 0. Does not change the timer running state but does +//! recapture this point in time as the current start time if it is running. +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchWin::reset() { + diff_time = 0; + total_time = 0; + clock_sessions = 0; + + if (running) { + QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&start_time)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +//! Time in msec. after start. If the stop watch is still running (i.e. there +//! was no call to stop()) then the elapsed time is returned added to the +//! current diff_time sum, otherwise the current summed time difference alone +//! is returned. +//////////////////////////////////////////////////////////////////////////////// +inline float StopWatchWin::getTime() { + // Return the TOTAL time to date + float retval = total_time; + + if (running) { + LARGE_INTEGER temp; + QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER *>(&temp)); + retval += static_cast<float>(((static_cast<double>(temp.QuadPart) - + static_cast<double>(start_time.QuadPart)) / + freq)); + } + + return retval; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Time in msec. for a single run based on the total number of COMPLETED runs +//! and the total time. +//////////////////////////////////////////////////////////////////////////////// +inline float StopWatchWin::getAverageTime() { + return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f; +} +#else +// Declarations for Stopwatch on Linux and Mac OSX +// includes, system +#include <sys/time.h> +#include <ctime> + +//! Windows specific implementation of StopWatch +class StopWatchLinux : public StopWatchInterface { +public: + //! Constructor, default + StopWatchLinux() + : start_time(), + diff_time(0.0), + total_time(0.0), + running(false), + clock_sessions(0) {} + + // Destructor + virtual ~StopWatchLinux() {} + +public: + //! Start time measurement + inline void start(); + + //! Stop time measurement + inline void stop(); + + //! Reset time counters to zero + inline void reset(); + + //! Time in msec. after start. If the stop watch is still running (i.e. there + //! was no call to stop()) then the elapsed time is returned, otherwise the + //! time between the last start() and stop call is returned + inline float getTime(); + + //! Mean time to date based on the number of times the stopwatch has been + //! _stopped_ (ie finished sessions) and the current total time + inline float getAverageTime(); + +private: + // helper functions + + //! Get difference between start time and current time + inline float getDiffTime(); + +private: + // member variables + + //! Start of measurement + struct timeval start_time; + + //! Time difference between the last start and stop + float diff_time; + + //! TOTAL time difference between starts and stops + float total_time; + + //! flag if the stop watch is running + bool running; + + //! Number of times clock has been started + //! and stopped to allow averaging + int clock_sessions; +}; + +// functions, inlined + +//////////////////////////////////////////////////////////////////////////////// +//! Start time measurement +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchLinux::start() { + gettimeofday(&start_time, 0); + running = true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Stop time measurement and increment add to the current diff_time summation +//! variable. Also increment the number of times this clock has been run. +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchLinux::stop() { + diff_time = getDiffTime(); + total_time += diff_time; + running = false; + clock_sessions++; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Reset the timer to 0. Does not change the timer running state but does +//! recapture this point in time as the current start time if it is running. +//////////////////////////////////////////////////////////////////////////////// +inline void StopWatchLinux::reset() { + diff_time = 0; + total_time = 0; + clock_sessions = 0; + + if (running) { + gettimeofday(&start_time, 0); + } +} + +//////////////////////////////////////////////////////////////////////////////// +//! Time in msec. after start. If the stop watch is still running (i.e. there +//! was no call to stop()) then the elapsed time is returned added to the +//! current diff_time sum, otherwise the current summed time difference alone +//! is returned. +//////////////////////////////////////////////////////////////////////////////// +inline float StopWatchLinux::getTime() { + // Return the TOTAL time to date + float retval = total_time; + + if (running) { + retval += getDiffTime(); + } + + return retval; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Time in msec. for a single run based on the total number of COMPLETED runs +//! and the total time. +//////////////////////////////////////////////////////////////////////////////// +inline float StopWatchLinux::getAverageTime() { + return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f; +} +//////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +inline float StopWatchLinux::getDiffTime() { + struct timeval t_time; + gettimeofday(&t_time, 0); + + // time difference in milli-seconds + return static_cast<float>(1000.0 * (t_time.tv_sec - start_time.tv_sec) + + (0.001 * (t_time.tv_usec - start_time.tv_usec))); +} +#endif // WIN32 + +//////////////////////////////////////////////////////////////////////////////// +//! Timer functionality exported + +//////////////////////////////////////////////////////////////////////////////// +//! Create a new timer +//! @return true if a time has been created, otherwise false +//! @param name of the new timer, 0 if the creation failed +//////////////////////////////////////////////////////////////////////////////// +inline bool sdkCreateTimer(StopWatchInterface **timer_interface) { +// printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface); +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + *timer_interface = reinterpret_cast<StopWatchInterface *>(new StopWatchWin()); +#else + *timer_interface = + reinterpret_cast<StopWatchInterface *>(new StopWatchLinux()); +#endif + return (*timer_interface != NULL) ? true : false; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Delete a timer +//! @return true if a time has been deleted, otherwise false +//! @param name of the timer to delete +//////////////////////////////////////////////////////////////////////////////// +inline bool sdkDeleteTimer(StopWatchInterface **timer_interface) { + // printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface); + if (*timer_interface) { + delete *timer_interface; + *timer_interface = NULL; + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Start the time with name \a name +//! @param name name of the timer to start +//////////////////////////////////////////////////////////////////////////////// +inline bool sdkStartTimer(StopWatchInterface **timer_interface) { + // printf("sdkStartTimer called object %08x\n", (void *)*timer_interface); + if (*timer_interface) { + (*timer_interface)->start(); + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Stop the time with name \a name. Does not reset. +//! @param name name of the timer to stop +//////////////////////////////////////////////////////////////////////////////// +inline bool sdkStopTimer(StopWatchInterface **timer_interface) { + // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface); + if (*timer_interface) { + (*timer_interface)->stop(); + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Resets the timer's counter. +//! @param name name of the timer to reset. +//////////////////////////////////////////////////////////////////////////////// +inline bool sdkResetTimer(StopWatchInterface **timer_interface) { + // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface); + if (*timer_interface) { + (*timer_interface)->reset(); + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Return the average time for timer execution as the total time +//! for the timer dividied by the number of completed (stopped) runs the timer +//! has made. +//! Excludes the current running time if the timer is currently running. +//! @param name name of the timer to return the time of +//////////////////////////////////////////////////////////////////////////////// +inline float sdkGetAverageTimerValue(StopWatchInterface **timer_interface) { + // printf("sdkGetAverageTimerValue called object %08x\n", (void + // *)*timer_interface); + if (*timer_interface) { + return (*timer_interface)->getAverageTime(); + } else { + return 0.0f; + } +} + +//////////////////////////////////////////////////////////////////////////////// +//! Total execution time for the timer over all runs since the last reset +//! or timer creation. +//! @param name name of the timer to obtain the value of. +//////////////////////////////////////////////////////////////////////////////// +inline float sdkGetTimerValue(StopWatchInterface **timer_interface) { + // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface); + if (*timer_interface) { + return (*timer_interface)->getTime(); + } else { + return 0.0f; + } +} + +#endif // COMMON_HELPER_TIMER_H_ \ No newline at end of file diff --git a/3rdParty/googletest/CMakeLists.txt b/3rdParty/googletest/CMakeLists.txt index acc5fb1ed8d37bcc89e9e60aceb28c7400e7cfca..781b60be44fc76ec640fea0444a527c10125e141 100644 --- a/3rdParty/googletest/CMakeLists.txt +++ b/3rdParty/googletest/CMakeLists.txt @@ -1,7 +1,7 @@ # Note: CMake support is community-based. The maintainers do not use CMake # internally. -cmake_minimum_required(VERSION 2.8.8) +cmake_minimum_required(VERSION 2.8.12) if (POLICY CMP0048) cmake_policy(SET CMP0048 NEW) diff --git a/3rdParty/googletest/googlemock/CMakeLists.txt b/3rdParty/googletest/googlemock/CMakeLists.txt index d32b70b5be0e0ae74f5376fb03a2226065ad599a..63cd3c61441ee2ba9177b6b366c069013b0ff4ad 100644 --- a/3rdParty/googletest/googlemock/CMakeLists.txt +++ b/3rdParty/googletest/googlemock/CMakeLists.txt @@ -42,7 +42,7 @@ else() cmake_policy(SET CMP0048 NEW) project(gmock VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C) endif() -cmake_minimum_required(VERSION 2.6.4) +cmake_minimum_required(VERSION 2.8.12) if (COMMAND set_up_hermetic_build) set_up_hermetic_build() diff --git a/3rdParty/googletest/googletest/CMakeLists.txt b/3rdParty/googletest/googletest/CMakeLists.txt index db292946a59453e09929229c1fbdb3701f2bd6ab..0ef01d22e7216988d86f728a27309d6494e36b1c 100644 --- a/3rdParty/googletest/googletest/CMakeLists.txt +++ b/3rdParty/googletest/googletest/CMakeLists.txt @@ -53,7 +53,7 @@ else() cmake_policy(SET CMP0048 NEW) project(gtest VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C) endif() -cmake_minimum_required(VERSION 2.6.4) +cmake_minimum_required(VERSION 2.8.12) if (POLICY CMP0063) # Visibility cmake_policy(SET CMP0063 NEW) diff --git a/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake b/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake index d83b2083c176a3addb3ddb951fd0e44923b18aa6..b8478c5d06fd5f7f1347ef8da75073a90fa5faa1 100644 --- a/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake +++ b/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake @@ -61,7 +61,9 @@ if(GDB) set(GKlib_COPTS "${GKlib_COPTS} -g") set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror") else() - set(GKlib_COPTS "-O3") + if(NOT MSVC) + set(GKlib_COPTS "-O3") + endif() endif(GDB) diff --git a/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt index 4732b645ea354ada4a61540e12f73bff90540cb5..802241df51949274173b3f647cf6a54615947eb5 100644 --- a/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt +++ b/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt @@ -6,10 +6,12 @@ file(GLOB metis_sources *.c) add_library(metis ${METIS_LIBRARY_TYPE} ${GKlib_sources} ${metis_sources}) if(UNIX) target_link_libraries(metis m) + + target_compile_options(metis PRIVATE "-Wno-format") endif() if(MSVC) - target_compile_options(metis PRIVATE "/w") + target_compile_options(metis PRIVATE "/W0") endif() if(METIS_INSTALL) diff --git a/CMake/3rd.cmake b/CMake/3rd.cmake index 6cc488f94716f7cec973874b0930ed8f9b719d08..781146111d48739671b35c98bb96ebff358809b4 100644 --- a/CMake/3rd.cmake +++ b/CMake/3rd.cmake @@ -1,5 +1,2 @@ include(${VF_CMAKE_DIR}/3rd/boost.cmake) -include(${VF_CMAKE_DIR}/3rd/cuda.cmake) include(${VF_CMAKE_DIR}/3rd/gmock.cmake) -include(${VF_CMAKE_DIR}/3rd/mpi.cmake) -include(${VF_CMAKE_DIR}/3rd/OpenMP.cmake) \ No newline at end of file diff --git a/CMake/3rd/OpenMP.cmake b/CMake/3rd/OpenMP.cmake deleted file mode 100644 index 45465ba4bf43dd5cf7687b83ac1d368332614582..0000000000000000000000000000000000000000 --- a/CMake/3rd/OpenMP.cmake +++ /dev/null @@ -1,13 +0,0 @@ -function (linkOpenMP targetName) - - if(NOT USE_OPENMP) - return() - endif() - - find_package(OpenMP REQUIRED) - - if(OpenMP_CXX_FOUND) - target_link_libraries(${targetName} PUBLIC OpenMP::OpenMP_CXX) - endif() - -endfunction() \ No newline at end of file diff --git a/CMake/3rd/boost.cmake b/CMake/3rd/boost.cmake index 544ae2d97b3b8ef0277445cf252149986c8dfb3a..74f6f165cda1ef93f64dd326c961b2663bc6ea67 100644 --- a/CMake/3rd/boost.cmake +++ b/CMake/3rd/boost.cmake @@ -1,4 +1,10 @@ -function(linkBoost components) +function(linkBoost) + + set( options ) + set( oneValueArgs ) + set( multiValueArgs COMPONENTS) + cmake_parse_arguments( ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) + if(BUILD_SHARED_LIBS) if (WIN32) set(Boost_USE_STATIC_LIBS ON) @@ -8,7 +14,9 @@ function(linkBoost components) set(Boost_USE_STATIC_RUNTIME OFF) else() set(Boost_USE_STATIC_LIBS ON) - set(Boost_USE_STATIC_RUNTIME ON) + if(WIN32) + set(Boost_USE_STATIC_RUNTIME ON) + endif() endif() set(Boost_USE_MULTITHREADED ON) @@ -18,8 +26,16 @@ function(linkBoost components) # add_definitions( -DBOOST_ALL_DYN_LINK ) endif() - vf_get_library_name(library_name) - find_package( Boost REQUIRED COMPONENTS ${components}) + vf_get_library_name(library_name) + if(DEFINED ARG_COMPONENTS) + find_package( Boost REQUIRED COMPONENTS ${ARG_COMPONENTS}) + target_link_libraries(${library_name} PRIVATE ${Boost_LIBRARIES}) + message("here") + else() + find_package( Boost REQUIRED) + message("or here") + endif() + + target_include_directories(${library_name} PRIVATE ${Boost_INCLUDE_DIR}) - target_link_libraries(${library_name} PRIVATE ${Boost_LIBRARIES}) endfunction() diff --git a/CMake/3rd/cuda.cmake b/CMake/3rd/cuda.cmake deleted file mode 100644 index 83acbdcc858e87e04d46d5a33e9b64b11c0e20e4..0000000000000000000000000000000000000000 --- a/CMake/3rd/cuda.cmake +++ /dev/null @@ -1,21 +0,0 @@ - -function(linkCUDA) - - find_path(CUDA_CUT_INCLUDE_DIR - helper_cuda.h - PATHS "$ENV{NVCUDASAMPLES_ROOT}" "${NVCUDASAMPLES_ROOT}" - PATH_SUFFIXES "common/inc" "Common" - DOC "Location of helper_cuda.h" - NO_DEFAULT_PATH - ) - - vf_get_library_name(library_name) - target_include_directories(${library_name} PRIVATE ${CUDA_CUT_INCLUDE_DIR}) - target_include_directories(${library_name} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - - message(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - - # set the following properties only for specific targets - # set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON) - # set_property(TARGET ${targetName} PROPERTY CUDA_64_BIT_DEVICE_CODE ON) -endfunction() \ No newline at end of file diff --git a/CMake/3rd/mpi.cmake b/CMake/3rd/mpi.cmake deleted file mode 100644 index 93b3ee386cc46623ed02bbbffb88996328f03e52..0000000000000000000000000000000000000000 --- a/CMake/3rd/mpi.cmake +++ /dev/null @@ -1,9 +0,0 @@ -function (linkMPI) - - find_package(MPI REQUIRED) - - vf_get_library_name(library_name) - target_include_directories(${library_name} PUBLIC ${MPI_CXX_INCLUDE_PATH}) - target_link_libraries(${library_name} PRIVATE MPI::MPI_CXX) - -endfunction() \ No newline at end of file diff --git a/CMake/CMakeSetCompilerFlags.cmake b/CMake/CMakeSetCompilerFlags.cmake index 4165eeff8b8cce1e89d19beffefe7507496fee49..784f3f24a7cde518e113363aa92ce90fec0e9c2d 100644 --- a/CMake/CMakeSetCompilerFlags.cmake +++ b/CMake/CMakeSetCompilerFlags.cmake @@ -1,29 +1,21 @@ -############################################################### -# set hostname -> CAB_MACHINE and load an optional config file -############################################################### +######################################################################################### +## Access the hostname and loads a optional machine file hostname.cmake +######################################################################################### macro(loadMachineFile) - IF(NOT CAB_MACHINE) - SET(CAB_MACHINE $ENV{CAB_MACHINE}) + site_name(MACHINE_NAME) + string(TOUPPER "${MACHINE_NAME}" MACHINE_NAME) - IF( CAB_MACHINE ) - STRING(TOUPPER "${CAB_MACHINE}" CAB_MACHINE) - ELSE() - EXECUTE_PROCESS( COMMAND hostname OUTPUT_VARIABLE CAB_MACHINE) - STRING(REGEX REPLACE "[ ]*([A-Za-z0-9]+).*[\\\\n]*" "\\1" CAB_MACHINE "${CAB_MACHINE}" ) - STRING(TOUPPER "${CAB_MACHINE}" CAB_MACHINE) - ENDIF() - ENDIF() + set(BUILD_MACHINE_FILE_PATH "${VF_CMAKE_DIR}/cmake_config_files") - LIST(APPEND VF_COMPILER_DEFINITION CAB_MACHINE=${CAB_MACHINE}) - SET(CMAKE_CONFIG_FILE "${VF_CMAKE_DIR}/cmake_config_files/${CAB_MACHINE}.config.cmake") + set(MACHINE_FILE "${BUILD_MACHINE_FILE_PATH}/${MACHINE_NAME}.config.cmake") - IF(NOT EXISTS ${CMAKE_CONFIG_FILE}) - status("No configuration file found for machine: ${CAB_MACHINE}.") + IF(NOT EXISTS ${MACHINE_FILE}) + status("No configuration file found: ${MACHINE_FILE}.") ELSE() - status("Load configuration file ${CAB_MACHINE}.config.cmake") - include(${CMAKE_CONFIG_FILE}) + status("Load configuration file: ${MACHINE_FILE}") + include(${MACHINE_FILE}) ENDIF() endmacro() @@ -35,9 +27,9 @@ endmacro() ################################################################ macro(loadCompilerFlags) - SET(CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "") - SET(CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "") - SET(CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE "") + SET(CS_COMPILER_FLAGS_CXX "") + SET(CS_COMPILER_FLAGS_CXX_DEBUG "") + SET(CS_COMPILER_FLAGS_CXX_RELEASE "") # https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html#variable:CMAKE_<LANG>_COMPILER_ID @@ -57,9 +49,9 @@ endmacro() ################################################################ function(addAdditionalFlags project_name) - status_lib("additional compiler flags CXX: ${CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS}") - status_lib("additional compiler flags CXX debug: ${CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG}") - status_lib("additional compiler flags CXX release: ${CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE}") + status_lib("additional compiler flags CXX: ${CS_COMPILER_FLAGS_CXX}") + status_lib("additional compiler flags CXX debug: ${CS_COMPILER_FLAGS_CXX_DEBUG}") + status_lib("additional compiler flags CXX release: ${CS_COMPILER_FLAGS_CXX_RELEASE}") status_lib("additional compiler definitions: ${VF_COMPILER_DEFINITION}") status_lib("additional linker flags: ${VF_LINK_OPTIONS}") @@ -74,15 +66,15 @@ function(addAdditionalFlags project_name) endforeach() # compile options - foreach(flag IN LISTS CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS) + foreach(flag IN LISTS CS_COMPILER_FLAGS_CXX) target_compile_options(${project_name} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${flag}>") endforeach() - foreach(flag IN LISTS CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG) + foreach(flag IN LISTS CS_COMPILER_FLAGS_CXX_DEBUG) target_compile_options(${project_name} PRIVATE "$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:DEBUG>>:${flag}>") endforeach() - foreach(flag IN LISTS CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE) + foreach(flag IN LISTS CS_COMPILER_FLAGS_CXX_RELEASE) target_compile_options(${project_name} PRIVATE "$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELEASE>>:${flag}>") endforeach() diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake index 69e56fa63d4f25adb84e539678055e7f46b62d3d..debb5ee7826d7dc3d6499b3813070dcab4b94bd4 100644 --- a/CMake/VirtualFluidsMacros.cmake +++ b/CMake/VirtualFluidsMacros.cmake @@ -112,6 +112,15 @@ function(vf_add_library) else() vf_get_library_name (library_name) endif() + + if(NOT DEFINED ARG_BUILDTYPE) + if(BUILD_SHARED_LIBS) + set(ARG_BUILDTYPE "shared") + else() + set(ARG_BUILDTYPE "static") + endif() + endif() + status("Configuring the target: ${library_name} (type=${ARG_BUILDTYPE})...") @@ -216,6 +225,10 @@ function(vf_add_library) target_include_directories(${library_name} PRIVATE ${VF_SRC_DIR}/gpu) target_include_directories(${library_name} PRIVATE ${VF_SRC_DIR}/cpu) + if(BUILD_VF_GPU) + target_include_directories(${library_name} PRIVATE "${VF_THIRD_DIR}/cuda_samples/") + target_include_directories(${library_name} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + endif() status("... configuring target: ${library_name} (type=${ARG_BUILDTYPE}) done") diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake index 1d69df88bc174c206fb3639b7d67e703dd41d5eb..2f576538c106a6a4d83509a49a1408a8d63efbdb 100644 --- a/CMake/cmake_config_files/PHOENIX.config.cmake +++ b/CMake/cmake_config_files/PHOENIX.config.cmake @@ -43,4 +43,5 @@ SET(BOOST_LIBRARYDIR "/cluster/lib/boost/1.63.0/gcc/lib" CACHE PATH "BOOST_LIB #SET(VTK_DIR "/home/irmb/tools/VTK/build/VTK-8.2.0" CACHE PATH "VTK directory override" FORCE) #SET(VTK_DIR "/home/stelenz/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE) -set(NVCUDASAMPLES_ROOT "/cluster/cuda/11.0/samples") +## nvidia +set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100 \ No newline at end of file diff --git a/CMake/compilerflags/AppleClang.cmake b/CMake/compilerflags/AppleClang.cmake index aecc48fa5398c6418c2d47f5af243659128050bc..6f52ad35956a967b3892aed26257fa1c41cb0c46 100644 --- a/CMake/compilerflags/AppleClang.cmake +++ b/CMake/compilerflags/AppleClang.cmake @@ -3,20 +3,26 @@ ############################################################################################################# # debug -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-g") # generates debug information. Works best with -O0. -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-O0") +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-g") # generates debug information. Works best with -O0. +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-O0") # release -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) +list(APPEND CS_COMPILER_FLAGS_CXX_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) # all -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-fPIC") # position independent code for shared libraries +list(APPEND CS_COMPILER_FLAGS_CXX "-fPIC") # position independent code for shared libraries ############################################################################################################# # warnings ############################################################################################################# -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wall") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wunreachable-code") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wall") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wextra") +list(APPEND CS_COMPILER_FLAGS_CXX "-pedantic") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-unused-function") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-reorder") +if(BUILD_WARNINGS_AS_ERRORS) + list(APPEND CS_COMPILER_FLAGS_CXX -Werror) +endif() + +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-function") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-parameter") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-reorder") diff --git a/CMake/compilerflags/Clang.cmake b/CMake/compilerflags/Clang.cmake index 434be42697165ea347b2c2728e9cf836b66650af..2eb4eec5ee89715a1668ef078c84f4e720bd04e6 100644 --- a/CMake/compilerflags/Clang.cmake +++ b/CMake/compilerflags/Clang.cmake @@ -3,22 +3,31 @@ ############################################################################################################# # debug -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-g") # generates debug information. Works best with -O0. -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-O0") +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-g") # generates debug information. Works best with -O0. +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-O0") # release -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) +list(APPEND CS_COMPILER_FLAGS_CXX_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) # all -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-fPIC") # position independent code for shared libraries +list(APPEND CS_COMPILER_FLAGS_CXX "-fPIC") # position independent code for shared libraries ############################################################################################################# # warnings ############################################################################################################# -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wall") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-unused-function") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-reorder-ctor") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wall") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wextra") +list(APPEND CS_COMPILER_FLAGS_CXX "-pedantic") + +if(BUILD_WARNINGS_AS_ERRORS) + list(APPEND CS_COMPILER_FLAGS_CXX -Werror) +endif() + +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-function") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-parameter") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-reorder-ctor") + ############################################################################################################# diff --git a/CMake/compilerflags/GNU.cmake b/CMake/compilerflags/GNU.cmake index 211f2a040b9c6c71d632fbee8621377d1df63350..3e67b79e9c84d29b51b2881b17e2b74f5510bbd2 100644 --- a/CMake/compilerflags/GNU.cmake +++ b/CMake/compilerflags/GNU.cmake @@ -3,33 +3,41 @@ ############################################################################################################# # debug -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-g") # generates debug information. Works best with -O0. -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-O0") # no optimization +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-g") # generates debug information. Works best with -O0. +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-O0") # no optimization # release -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) +list(APPEND CS_COMPILER_FLAGS_CXX_RELEASE "-O3") # optimization level (-O3: most optimization which also could result in larger binaries) # all -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-fPIC") # position independent code for shared libraries +list(APPEND CS_COMPILER_FLAGS_CXX "-fPIC") # position independent code for shared libraries if(NOT BUILD_VF_INCLUDE_WHAT_YOU_USE) # optimization flag '-funroll-all-loops' is not supported for IWYU - LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-funroll-all-loops") + LIST(APPEND CS_COMPILER_FLAGS_CXX "-funroll-all-loops") endif() # gcov if (BUILD_VF_COVERAGE) - list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "--coverage") + list(APPEND CS_COMPILER_FLAGS_CXX "--coverage") set(CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} " --coverage") endif() ############################################################################################################# # warnings ############################################################################################################# -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wall") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-unused-function") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-reorder") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-sign-compare") -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-Wno-unknown-pragmas") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wall") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wextra") +list(APPEND CS_COMPILER_FLAGS_CXX "-pedantic") + +if(BUILD_WARNINGS_AS_ERRORS) + list(APPEND CS_COMPILER_FLAGS_CXX -Werror) +endif() + +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-function") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unused-parameter") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-reorder") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-unknown-pragmas") +list(APPEND CS_COMPILER_FLAGS_CXX "-Wno-cast-function-type") ############################################################################################################# # linker options diff --git a/CMake/compilerflags/Intel.cmake b/CMake/compilerflags/Intel.cmake index a53998d93eec146de6158d80bad2302536b2c252..c7177d4c246402d8b865e7059273cb18d0938e44 100644 --- a/CMake/compilerflags/Intel.cmake +++ b/CMake/compilerflags/Intel.cmake @@ -2,22 +2,22 @@ # compiler flags ############################################################################################################# -#LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-O") -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd654") -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd1125") #virtual function override intended -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd1224") #warning directive: This file includes at least one deprecated or antiquated header -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd377") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd327") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-wd327") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor -#~ -#~ LIST(APPEND CAB_COMPILER_ADDTIONAL_C_COMPILER_FLAGS "-wd266") #function "__GKfree" declared implicitly -#LIST(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-xHOST -O3 -ip -ipo -fno-alias -mcmodel=medium -qopt-streaming-stores=always") +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-O") +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd654") +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd1125") #virtual function override intended +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd1224") #warning directive: This file includes at least one deprecated or antiquated header +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd377") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd327") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-wd327") #class "std::auto_ptr<RCF::I_ClientTransport>" has no suitable copy constructor +# +#LIST(APPEND CAB_COMPILER_ADDTIONAL_C_COMPILER_FLAGS "-wd266") #function "__GKfree" declared implicitly +#LIST(APPEND CS_COMPILER_FLAGS_CXX "-xHOST -O3 -ip -ipo -fno-alias -mcmodel=medium -qopt-streaming-stores=always") # all -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-xHOST;-O3;-ip;-fno-alias;-mcmodel=medium;-qopt-streaming-stores=always;-xCORE-AVX512;-qopt-zmm-usage=high") +list(APPEND CS_COMPILER_FLAGS_CXX "-xHOST;-O3;-ip;-fno-alias;-mcmodel=medium;-qopt-streaming-stores=always;-xCORE-AVX512;-qopt-zmm-usage=high") # debug -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS_DEBUG "-g -traceback") +list(APPEND CS_COMPILER_FLAGS_CXX_DEBUG "-g -traceback") ############################################################################################################# diff --git a/CMake/compilerflags/MSVC.cmake b/CMake/compilerflags/MSVC.cmake index 937d9f4d5d1fead78f35e2442845b27f13bc9a54..2af38d98e63cf04c4da476fb02754ce47510e4f6 100644 --- a/CMake/compilerflags/MSVC.cmake +++ b/CMake/compilerflags/MSVC.cmake @@ -1,29 +1,29 @@ ############################################################################################################# # compiler flags ############################################################################################################# -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/bigobj") # increases that address capacity to 4,294,967,296 (2^32). -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "-MP") # enable multi-threaded compiling +list(APPEND CS_COMPILER_FLAGS_CXX "/bigobj") # increases that address capacity to 4,294,967,296 (2^32). +list(APPEND CS_COMPILER_FLAGS_CXX "-MP") # enable multi-threaded compiling ############################################################################################################# # warnings ############################################################################################################# -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/W4") # highest warning level +list(APPEND CS_COMPILER_FLAGS_CXX "/W4") # highest warning level # With W4 the following warnings appear many times. As long they are not eliminated they are suppressed: -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4458") # C4458: declaration of 'XXX' hides class member -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4100") # C4100: 'XXX': unreferenced formal parameter -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4505") # C4505: 'XXX': unreferenced local function has been removed -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4244") # C4244: '=': conversion from 'int' to 'char', possible loss of data, triggered by algorithm(2216,24) -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4310") # C4310: cast truncates constant value, triggerd by muParserbase.h -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4127") # C4127: conditional expression is constant: e.g. sizeof(int) +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4458") # C4458: declaration of 'XXX' hides class member +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4100") # C4100: 'XXX': unreferenced formal parameter +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4505") # C4505: 'XXX': unreferenced local function has been removed +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4244") # C4244: '=': conversion from 'int' to 'char', possible loss of data, triggered by algorithm(2216,24) +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4310") # C4310: cast truncates constant value, triggerd by muParserbase.h +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4127") # C4127: conditional expression is constant: e.g. sizeof(int) # Urgent FIXME: This warning should be activated and fixed: -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4701") # C4701: potentially uninitialized local variable 'lMaxX3' used +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4701") # C4701: potentially uninitialized local variable 'lMaxX3' used -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4251") # disable needs to have dll interface -list(APPEND CAB_COMPILER_ADDTIONAL_CXX_COMPILER_FLAGS "/wd4005") # disable macro redefinition (triggered by metis.h) +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4251") # disable needs to have dll interface +list(APPEND CS_COMPILER_FLAGS_CXX "/wd4005") # disable macro redefinition (triggered by metis.h) ############################################################################################################# # preprocessor definitions diff --git a/CMakeLists.txt b/CMakeLists.txt index 50073d8a0016c0345e9ae7ae3cfc5507afb6ad14..efce16caaf8d40ae3f5a9ca9bcedd5c5a6767bf5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,17 +8,15 @@ ################################################################################# # required cmake versions # CMAKE 3.13: target_link_options +# CMAKE 3.15: CMAKE_MSVC_RUNTIME_LIBRARY ################################################################################# -cmake_minimum_required(VERSION 3.13..3.18 FATAL_ERROR) +cmake_minimum_required(VERSION 3.15..3.19 FATAL_ERROR) project(VirtualFluids CXX) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_STANDARD 14) -set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) - set_property(GLOBAL PROPERTY USE_FOLDERS ON) set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER ".cmake") set(libraryFolder "libs") @@ -37,30 +35,36 @@ set (VF_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}) option(BUILD_VF_CPU "Build VirtualFluids cpu variant" OFF) option(BUILD_VF_GPU "Build VirtualFluids gpu variant" OFF) +option(BUILD_USE_OPENMP "Build VirtualFluids with openmp" ON) + + +# vf gpu +option(BUILD_VF_GPU "Build VirtualFluids GPU" ON ) +option(BUILD_VF_GKS "Build VirtualFluids GKS" OFF ) +option(BUILD_VF_TRAFFIC "Build VirtualFluids Traffic" OFF) +option(BUILD_JSONCPP "Builds json cpp " OFF) +option(BUILD_NUMERIC_TESTS "Build numeric tests" OFF) + option(BUILD_VF_UNIT_TESTS "Build VirtualFluids unit tests" OFF) option(BUILD_VF_CLANG_TIDY "Add the clang tidy checks to the targets" OFF) option(BUILD_VF_INCLUDE_WHAT_YOU_USE "Add IWYU to the targets" OFF) option(BUILD_VF_CPPCHECK "Add cppcheck to the targets" OFF) option(BUILD_VF_COVERAGE "Add the -coverage compiler flag." OFF) -option(BUILD_SHARED_LIBS "" ON) +option(BUILD_SHARED_LIBS "" OFF) +option(BUILD_WARNINGS_AS_ERRORS "" OFF) -option(USE_OPENMP "Include OpenMP support" ON) +# windows: use multi-threaded dynamically-linked runtime library +if(BUILD_SHARED_LIBS) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL") +else() + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") +endif() option(BUILD_VF_PYTHON_BINDINGS "" OFF) option(BUILD_VF_DOUBLE_ACCURACY "Use double accuracy" OFF) -################################################################################# -# CMAKE POLICIES -################################################################################# -# CMAKE_CUDA_ARCHITECTURES -# https://cmake.org/cmake/help/git-stage/policy/CMP0104.htmls -if(POLICY CMP0104) - cmake_policy(SET CMP0104 NEW) - set(CMAKE_CUDA_ARCHITECTURES 30) - # with cuda 11 the minimum architecture is 52 -endif() ################################################################################# # MACROS @@ -75,6 +79,13 @@ ENDIF() ################################################################################# # COMMON LIBRARIES ################################################################################# +if(BUILD_USE_OPENMP) + find_package(OpenMP REQUIRED) +endif() + +find_package(MPI REQUIRED) + + add_subdirectory(src/basics) ################################################################################# @@ -84,6 +95,24 @@ if (BUILD_VF_CPU) include (cpu.cmake) endif() if(BUILD_VF_GPU) + + include(CheckLanguage) + check_language(CUDA) + + if(NOT CMAKE_CUDA_COMPILER) + message(FATAL_ERROR "CUDA Compiler was requested but is not found on the system.") + endif() + + set(CMAKE_CUDA_STANDARD 11) + set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) + + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + message(WARNING "CMAKE_CUDA_ARCHITECTURES was not defined and is set to 30 (CUDA support until 10.1 only).") + set(CMAKE_CUDA_ARCHITECTURES 30) + endif() + + message("CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}") + include (gpu.cmake) endif() @@ -91,5 +120,7 @@ endif() # 3rd Party Libraries ################################################################################# if(BUILD_VF_UNIT_TESTS) - add_subdirectory(${VF_THIRD_DIR}/googletest) + if(NOT BUILD_NUMERIC_TESTS) # in this case googletest is already included. + add_subdirectory(${VF_THIRD_DIR}/googletest) + endif() endif() diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000000000000000000000000000000000000..6863446af85b177bb4dc99eed475aa52f4d50269 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,133 @@ +{ + "version": 1, + "cmakeMinimumRequired": { + "major": 3, + "minor": 19, + "patch": 0 + }, + "configurePresets": [ + { + "name": "default", + "hidden": true, + "binaryDir": "${sourceDir}/build/", + "cacheVariables": { + "BUILD_VF_UNIT_TESTS": "ON" + } + }, + { + "name": "default_make", + "inherits": "default", + "hidden": true, + "generator": "Unix Makefiles" + }, + { + "name": "default_ccache_make", + "inherits": "default_make", + "hidden": true, + "cacheVariables": { + "CMAKE_CXX_COMPILER_LAUNCHER": "ccache", + "CMAKE_CUDA_COMPILER_LAUNCHER": "ccache", + "CMAKE_C_COMPILER_LAUNCHER": "ccache" + } + }, + { + "name": "default_msvc", + "inherits": "default", + "hidden": true, + "generator": "Visual Studio 16 2019", + "architecture": "x64" + }, + { + "name": "default_cpu", + "hidden": true, + "description": "CPU build of VirtualFluids", + "cacheVariables": { + "BUILD_VF_CPU": "ON" + } + }, + { + "name": "default_gpu", + "hidden": true, + "description": "GPU build of VirtualFluids", + "cacheVariables": { + "BUILD_VF_GPU": "ON" + } + }, + { + "name": "default_gpu_numerical_tests", + "inherits": ["default_gpu"], + "hidden": true, + "description": "GPU numerical tests of VirtualFluids", + "cacheVariables": { + "BUILD_VF_DOUBLE_ACCURACY": "ON", + "BUILD_NUMERIC_TESTS": "ON" + } + }, + { + "name": "default_all", + "hidden": true, + "description": "All build of VirtualFluids", + "inherits": ["default_cpu", "default_gpu"] + }, + { + "name": "cpu_make", + "inherits": ["default_make", "default_cpu"], + "displayName": "cpu make configuration" + }, + { + "name": "cpu_make_ccache", + "inherits": ["default_ccache_make", "default_cpu"], + "displayName": "cpu ccache make configuration" + }, + { + "name": "cpu_msvc", + "inherits": ["default_msvc", "default_cpu"], + "displayName": "cpu msvc configuration" + }, + { + "name": "gpu_make", + "inherits": ["default_make", "default_gpu"], + "displayName": "gpu make configuration" + }, + { + "name": "gpu_make_ccache", + "inherits": ["default_ccache_make", "default_gpu"], + "displayName": "gpu ccache make configuration" + }, + { + "name": "gpu_msvc", + "inherits": ["default_msvc", "default_gpu"], + "displayName": "gpu msvc configuration" + }, + { + "name": "all_make", + "inherits": ["default_make", "default_all"], + "displayName": "all make configuration" + }, + { + "name": "all_make_ccache", + "inherits": ["default_ccache_make", "default_all"], + "displayName": "all ccache make configuration" + }, + { + "name": "all_msvc", + "inherits": ["default_msvc", "default_all"], + "displayName": "all msvc configuration" + }, + { + "name": "gpu_numerical_tests_make", + "inherits": ["default_make", "default_gpu_numerical_tests"], + "displayName": "gpu numerical tests make configuration" + }, + { + "name": "gpu_numerical_tests_ccache_make", + "inherits": ["default_ccache_make", "default_gpu_numerical_tests"], + "displayName": "gpu numerical tests ccache make configuration" + }, + { + "name": "gpu_numerical_tests_msvc", + "inherits": ["default_msvc", "default_gpu_numerical_tests"], + "displayName": "gpu numerical tests msvc configuration" + } + ] +} diff --git a/apps/gpu/LidDrivenCavityGPU/CMakeLists.txt b/apps/gpu/LidDrivenCavityGPU/CMakeLists.txt index f4e91979beaf3a63f1691803abe0feaf09a2d6cb..4a8f54ceea63c6efee33f38814d4b8db48695a2e 100644 --- a/apps/gpu/LidDrivenCavityGPU/CMakeLists.txt +++ b/apps/gpu/LidDrivenCavityGPU/CMakeLists.txt @@ -1,8 +1,3 @@ - - -PROJECT(LidDrivenCavityGPU) - +project(LidDrivenCavityGPU LANGUAGES CUDA CXX) vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator VirtualFluids_GPU GksMeshAdapter GksGpu FILES LidDrivenCavity.cpp) - -linkCUDA() diff --git a/gpu.cmake b/gpu.cmake index 8ccd015747d94b4724317da10f0dce6eba00750e..ff11b0a60ef949089130f8a8b2885d267ce91050 100644 --- a/gpu.cmake +++ b/gpu.cmake @@ -1,31 +1,7 @@ -############################################################# -### CUDAPATH ### -############################################################# - -# if CMake cannot find CUDA by itself, set the correct paths manually: -#SET(CUDA_CUT_INCLUDE_DIR "/cluster/cuda/9.0/include;/cluster/cuda/9.0/samples/common/inc" CACHE PATH "CUDA_CUT_INCLUDE_DIR") -#SET(CUDA_SAMPLE_INCLUDE_DIR "/cluster/cuda/9.0/samples/common/inc" CACHE PATH "CUDA_CUT_INCLUDE_DIR") - -############################################################# -### OPTIONS ### -############################################################# - -option(VF_DOUBLE_ACCURACY "Use double accuracy" ON ) - - -############################################################# - -enable_language(CUDA) - -############################################################# - - # only use this with device of CC larger than 6.0 IF(VF_DOUBLE_ACCURACY) - set(CMAKE_CUDA_FLAGS " -arch=sm_60" CACHE STRING "" FORCE) -ELSE(VF_DOUBLE_ACCURACY) - set(CMAKE_CUDA_FLAGS "" CACHE STRING "" FORCE) + set(CMAKE_CUDA_ARCHITECTURES 60) ENDIF(VF_DOUBLE_ACCURACY) set(CMAKE_CUDA_FLAGS_DEBUG " -G" CACHE STRING "" FORCE) diff --git a/sonar-project.properties b/sonar-project.properties index 51f5908ff5e93027d8d0a7a02224edb7a08f7142..79510bce96750061e92d6d0bf720eb1c6427790a 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -1,12 +1,9 @@ # must be unique in a given SonarQube instance sonar.projectKey=vf:project:open_source -# --- optional properties --- - -# defaults to project key sonar.projectName=VirtualFluids -# defaults to 'not provided' -#sonar.projectVersion=1.0 + +sonar.projectVersion=1.0 sonar.language=cxx @@ -36,4 +33,4 @@ sonar.cxx.gcc.reportPath=build/gcc_warnings.txt sonar.cxx.funccomplexity.threshold=10 -sonar.cxx.funcsize.threshold=20 \ No newline at end of file +sonar.cxx.funcsize.threshold=20 diff --git a/src/basics/CMakeLists.txt b/src/basics/CMakeLists.txt index 0b643212ad9c5da74e711d72da085c1fe670704e..1703d6269c1cebe36005226373da85b14c5515f6 100644 --- a/src/basics/CMakeLists.txt +++ b/src/basics/CMakeLists.txt @@ -1,7 +1,7 @@ include(Core/buildInfo.cmake) -vf_add_library(BUILDTYPE static EXCLUDE buildInfo.in.cpp) +vf_add_library(PUBLIC_LINK MPI::MPI_CXX EXCLUDE buildInfo.in.cpp) vf_get_library_name (library_name) target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/Core) @@ -21,7 +21,4 @@ IF(MSVC) target_compile_definitions(${library_name} PUBLIC NOMINMAX) # Disable Min/Max-Macros ENDIF(MSVC) - -linkMPI() - -vf_add_tests() \ No newline at end of file +vf_add_tests() diff --git a/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.cpp b/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.cpp index dce44b5551d2aedcf0c0d70a21dbcd9e8b071c8d..8fdb9fe1bc7f5dfe10dbfb522a87a4a3187dd60b 100644 --- a/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.cpp +++ b/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.cpp @@ -6,20 +6,17 @@ #include <fstream> #include <iostream> -BASICS_EXPORT std::shared_ptr<ConfigFileReader> ConfigFileReader::getNewInstance() +std::shared_ptr<ConfigFileReader> ConfigFileReader::getNewInstance() { return std::shared_ptr<ConfigFileReader>(new ConfigFileReader()); } -ConfigFileReader::ConfigFileReader() = default; - -BASICS_EXPORT ConfigFileReader::~ConfigFileReader() = default; - -BASICS_EXPORT std::shared_ptr<ConfigData> ConfigFileReader::readConfigFile(const std::string &filePath) const +std::shared_ptr<ConfigData> ConfigFileReader::readConfigFile(const char* filePath) const { + std::cout << filePath << std::endl; std::shared_ptr<ConfigDataImp> data = ConfigDataImp::getNewInstance(); std::ifstream stream; - stream.open(filePath.c_str(), std::ios::in); + stream.open(filePath, std::ios::in); if (stream.fail()) throw std::runtime_error("can not open config file!"); std::unique_ptr<input::Input> input = input::Input::makeInput(stream, "config"); diff --git a/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.h b/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.h index 9d88dfce6a7337d8ffaa7c4ffe43a4fd63949914..77c93ebfa4ba8564188d8e4a5442963382cf91e3 100644 --- a/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.h +++ b/src/basics/Core/Input/ConfigFileReader/ConfigFileReader.h @@ -1,9 +1,11 @@ #ifndef CONFIGFILEREADER_H #define CONFIGFILEREADER_H -#include "../Input.h" #include <memory> +#include <string> + +#include "basics_export.h" class ConfigData; @@ -11,11 +13,10 @@ class ConfigFileReader { public: BASICS_EXPORT static std::shared_ptr<ConfigFileReader> getNewInstance(); - BASICS_EXPORT virtual ~ConfigFileReader(); - BASICS_EXPORT std::shared_ptr<ConfigData> readConfigFile(const std::string &filePath) const; + BASICS_EXPORT std::shared_ptr<ConfigData> readConfigFile(const char* filePath) const; private: - ConfigFileReader(); + ConfigFileReader() = default; }; #endif diff --git a/src/basics/Core/StringUtilities/StringUtil.cpp b/src/basics/Core/StringUtilities/StringUtil.cpp index 31496beae5953159c8d9a08a25fb5e808d9efea3..327a62346a91073834c8b710e90968a524ee2d28 100644 --- a/src/basics/Core/StringUtilities/StringUtil.cpp +++ b/src/basics/Core/StringUtilities/StringUtil.cpp @@ -1,5 +1,6 @@ #include "StringUtil.h" +#include <string.h> #include <regex> #include <sstream> diff --git a/src/basics/Core/StringUtilities/StringUtil.h b/src/basics/Core/StringUtilities/StringUtil.h index cdf8dce290110848d5e6c50eb2fac35822dddf76..1927a69bc60bf2467fb09893463b3c9363191890 100644 --- a/src/basics/Core/StringUtilities/StringUtil.h +++ b/src/basics/Core/StringUtilities/StringUtil.h @@ -36,13 +36,23 @@ public: static BASICS_EXPORT bool endsWith(const std::string &input, const std::string &end); + + template<class T> + static T fromString(const std::string& s) + { + std::istringstream stream (s); + T t; + stream >> t; + return t; + } + private: StringUtil() = default; - ; + StringUtil(const StringUtil &) = default; - ; + virtual ~StringUtil() = default; - ; + static bool toBool(bool &t, const std::string &input, std::ios_base &(*f)(std::ios_base &)); }; diff --git a/src/basics/Core/buildInfo.in.cpp b/src/basics/Core/buildInfo.in.cpp index 56f302208256ba86d7f814bfa6711f704aea3479..482f4592a41bfed0615858869ec4bb297764b0b1 100644 --- a/src/basics/Core/buildInfo.in.cpp +++ b/src/basics/Core/buildInfo.in.cpp @@ -1,5 +1,3 @@ -#include "buildInfo.h" - #include "basics_export.h" namespace buildInfo diff --git a/src/basics/basics/utilities/UbComparators.h b/src/basics/basics/utilities/UbComparators.h index bc507e456b9603d85d42f8597fe4748a135e51d2..d461e27cb8d5f6026f13b2aea363d563ebed5c5c 100644 --- a/src/basics/basics/utilities/UbComparators.h +++ b/src/basics/basics/utilities/UbComparators.h @@ -170,7 +170,7 @@ struct compareMember { // l.sort( compareConstMethods<Klasse, double, &Klasse::getVal1 >() ); //} -}; // namespace UbComparators +} // namespace UbComparators #endif // UBCOMPARATOR_H diff --git a/src/basics/basics/utilities/UbEqual.h b/src/basics/basics/utilities/UbEqual.h index b3ca9102d585faeac0b2a4e413434d4d0d759282..87955b181ea3efaad4b7f3d2ebc746271ec95bb7 100644 --- a/src/basics/basics/utilities/UbEqual.h +++ b/src/basics/basics/utilities/UbEqual.h @@ -259,7 +259,7 @@ inline bool isUbEqual(const T1 &a, const T2 &b) { using Low = typename UbEqualTrait<T1, T2>::Low; return specific_equal<Low, Low>(static_cast<Low>(a), static_cast<Low>(b)); -}; +} ////////////////////////////////////////////////////////////////////////// // UbEqual-Functor diff --git a/src/basics/basics/utilities/UbLogger.h b/src/basics/basics/utilities/UbLogger.h index d350a763721b4b5fc1c2736fa56e2fff66a3eee0..fc2b118715a0f4afc0251b8ed8e2373a7d488153 100644 --- a/src/basics/basics/utilities/UbLogger.h +++ b/src/basics/basics/utilities/UbLogger.h @@ -228,7 +228,7 @@ inline std::string UbLogger<OutputPolicy>::logTimeString() char buffer[11]; time_t t; time(&t); - tm r = { 0 }; + tm r; // = { 0 }; strftime(buffer, sizeof(buffer), "%X", localtime_r(&t, &r)); struct timeval tv; gettimeofday(&tv, 0); diff --git a/src/basics/basics/utilities/UbSystem.h b/src/basics/basics/utilities/UbSystem.h index 8e676811a152b176efd617a944f4e22ea538721e..0436a360c2b595115824e1b7906214621bd76314 100644 --- a/src/basics/basics/utilities/UbSystem.h +++ b/src/basics/basics/utilities/UbSystem.h @@ -530,7 +530,7 @@ struct select2nd { const result_type &operator()(const argument_type &p) const { return p.second; } }; -}; // namespace UbSystem +} // namespace UbSystem #define UB_STATIC_ASSERT(expr) static_cast<void>(sizeof(UbSystem::ub_static_assert<expr>)); // zum ueberpruefen von STATISCHEN ausdruecken waehrend der compile-zeit diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.h b/src/basics/basics/writer/WbWriterVtkXmlBinary.h index 393c6bb13b268805a3fc2ca0850dd75d3fdd5616..421148d90497e3628ed274439c0b2fd7636b7fd2 100644 --- a/src/basics/basics/writer/WbWriterVtkXmlBinary.h +++ b/src/basics/basics/writer/WbWriterVtkXmlBinary.h @@ -37,7 +37,9 @@ #include <basics/writer/WbWriter.h> -class WbWriterVtkXmlBinary : public WbWriter +#include "basics_export.h" + +class BASICS_EXPORT WbWriterVtkXmlBinary : public WbWriter { public: static WbWriterVtkXmlBinary *getInstance() diff --git a/src/basics/geometry3d/GbCuboid3D.h b/src/basics/geometry3d/GbCuboid3D.h index f0a0b0f9884999050495156f3c547b26d6398e61..762a08c2696dbf58ea4b726528c89571fc21ab62 100644 --- a/src/basics/geometry3d/GbCuboid3D.h +++ b/src/basics/geometry3d/GbCuboid3D.h @@ -80,7 +80,7 @@ public: void setCenterCoordinates(const double &x1, const double &x2, const double &x3) override; void translate(const double &x1, const double &x2, const double &x3) override; - void rotate(const double &rx1, const double &rx2, const double &rx3) override {} + void rotate(const double &rx1, const double &rx2, const double &rx3) override { (void)rx1; (void)rx2; (void)rx3; } void scale(const double &sx1, const double &sx2, const double &sx3) override; double getLengthX1(); diff --git a/src/basics/geometry3d/GbObject3D.h b/src/basics/geometry3d/GbObject3D.h index f60e64c8f95dfde84a389c422bbc6b9377e68187..c562f42412c8ab75b0ebcd303a5785ab4939cdfb 100644 --- a/src/basics/geometry3d/GbObject3D.h +++ b/src/basics/geometry3d/GbObject3D.h @@ -49,6 +49,8 @@ class GbObject3DCreator; #include <PointerDefinitions.h> +#include "basics_export.h" + ////////////////////////////////////////////////////////////////////////// //! //! \class GbObject3D @@ -57,7 +59,7 @@ class GbObject3DCreator; //! ////////////////////////////////////////////////////////////////////////// -class GbObject3D : public ObObject +class BASICS_EXPORT GbObject3D : public ObObject { public: // abstract Methods diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h index 7cef8205ce13d89802c9bbd7eebb5d6eb3759b3f..734aa9342df011846ce345ba7c37a181383e8828 100644 --- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h +++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h @@ -47,18 +47,30 @@ class BoundaryConditions; class BCAlgorithm { public: - static const char VelocityBCAlgorithm = 0; - static const char EqDensityBCAlgorithm = 1; - static const char NonEqDensityBCAlgorithm = 2; - static const char NoSlipBCAlgorithm = 3; - static const char SlipBCAlgorithm = 4; - static const char HighViscosityNoSlipBCAlgorithm = 5; - static const char ThinWallNoSlipBCAlgorithm = 6; - static const char VelocityWithDensityBCAlgorithm = 7; + static const char VelocityBCAlgorithm = 0; + static const char EqDensityBCAlgorithm = 1; + static const char NonEqDensityBCAlgorithm = 2; + static const char NoSlipBCAlgorithm = 3; + static const char SlipBCAlgorithm = 4; + static const char HighViscosityNoSlipBCAlgorithm = 5; + static const char ThinWallNoSlipBCAlgorithm = 6; + static const char VelocityWithDensityBCAlgorithm = 7; static const char NonReflectingOutflowBCAlgorithm = 8; + static const char VelocityAndThixotropyBCAlgorithm = 9; + static const char DensityAndThixotropyBCAlgorithm = 10; + static const char NoSlipAndThixotropyBCAlgorithm = 11; + static const char NonReflectingOutflowAndThixotropyBCAlgorithm = 12; + static const char VelocityWithDensityAndThixotropyBCAlgorithm = 13; + static const char BinghamModelNoSlipBCAlgorithm = 14; + static const char HerschelBulkleyModelNoSlipBCAlgorithm = 15; + static const char SimpleVelocityBCAlgorithm = 16; + static const char SimpleSlipBCAlgorithm = 17; + static const char PowellEyringModelNoSlipBCAlgorithm = 18; + static const char BinghamModelVelocityBCAlgorithm = 19; + public: - BCAlgorithm() = default; + BCAlgorithm() = default; virtual ~BCAlgorithm() = default; virtual void addDistributions(SPtr<DistributionArray3D> distributions) = 0; @@ -72,11 +84,13 @@ public: SPtr<BCArray3D> getBcArray(); void setBcArray(SPtr<BCArray3D> bcarray); virtual void applyBC() = 0; + bool getThixotropy(){ return thixotropy; }; protected: - bool compressible{ false }; + bool compressible { false }; char type; bool preCollision; + bool thixotropy { false }; SPtr<BoundaryConditions> bcPtr; SPtr<DistributionArray3D> distributions; diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.cpp index 87606eecf03943259dfec89a805336d2a3190bfa..88f4a52b2ff0445838af5aade25d9c78ce6809a8 100644 --- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.cpp +++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.cpp @@ -63,11 +63,11 @@ void BCArray3D::resize(std::size_t nx1, std::size_t nx2, std::size_t nx3, int va ////////////////////////////////////////////////////////////////////////// bool BCArray3D::validIndices(std::size_t x1, std::size_t x2, std::size_t x3) const { - if (x1 < 0 || x1 >= this->bcindexmatrix.getNX1()) + if (x1 >= this->bcindexmatrix.getNX1()) return false; - if (x2 < 0 || x2 >= this->bcindexmatrix.getNX2()) + if (x2 >= this->bcindexmatrix.getNX2()) return false; - if (x3 < 0 || x3 >= this->bcindexmatrix.getNX3()) + if (x3 >= this->bcindexmatrix.getNX3()) return false; return true; } diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.h index 835e5b1c95454a9fbe8186d6942c3936a5e0e2cc..b9d08f7117d9dc41c008c9d92a5780aceedad21c 100644 --- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.h +++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCArray3D.h @@ -35,7 +35,7 @@ #define BCArray_H #include "BoundaryConditions.h" -#include "basics/container/CbArray3D.h" +#include "CbArray3D.h" #include <typeinfo> diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h index 1ab3b4e284de49fd1b80bad13d65fee98d221e57..84ba7a6041d38546da03323f35a23f7084df9809 100644 --- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h +++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h @@ -48,7 +48,6 @@ class BoundaryConditions { public: BoundaryConditions() - { UB_STATIC_ASSERT(sizeof(long long) >= 8); UB_STATIC_ASSERT((sizeof(long long) * 8) >= (D3Q27System::FENDDIR + 1) * BoundaryConditions::optionDigits); @@ -150,13 +149,13 @@ public: { return (short)(((slipBoundaryFlags >> (optionDigits * direction)) & maxOptionVal) - 1); } - void setNormalVector(const LBMReal &nx1, const LBMReal &nx2, const LBMReal &nx3) + void setNormalVector(const float &nx1, const float &nx2, const float &nx3) { this->nx1 = nx1; this->nx2 = nx2; this->nx3 = nx3; } - UbTupleDouble3 getNormalVector() { return makeUbTuple(nx1, nx2, nx3); } + UbTupleFloat3 getNormalVector() { return makeUbTuple(nx1, nx2, nx3); } /*============== Velocity Boundary ========================*/ void setVelocityBoundaryFlag(const int &direction, const short &secOpt = 0) @@ -181,72 +180,73 @@ public: void setBoundaryVelocity(const Vector3D &vx) { - setBoundaryVelocityX1((LBMReal)vx[0]); - setBoundaryVelocityX2((LBMReal)vx[1]); - setBoundaryVelocityX3((LBMReal)vx[2]); + setBoundaryVelocityX1((float)vx[0]); + setBoundaryVelocityX2((float)vx[1]); + setBoundaryVelocityX3((float)vx[2]); } - void setBoundaryVelocityX1(const LBMReal &vx1) { this->bcVelocityX1 = vx1; } - void setBoundaryVelocityX2(const LBMReal &vx2) { this->bcVelocityX2 = vx2; } - void setBoundaryVelocityX3(const LBMReal &vx3) { this->bcVelocityX3 = vx3; } - LBMReal getBoundaryVelocityX1() { return this->bcVelocityX1; } - LBMReal getBoundaryVelocityX2() { return this->bcVelocityX2; } - LBMReal getBoundaryVelocityX3() { return this->bcVelocityX3; } - LBMReal getBoundaryVelocity(const int &direction) + void setBoundaryVelocityX1(const float &vx1) { this->bcVelocityX1 = vx1; } + void setBoundaryVelocityX2(const float &vx2) { this->bcVelocityX2 = vx2; } + void setBoundaryVelocityX3(const float &vx3) { this->bcVelocityX3 = vx3; } + float getBoundaryVelocityX1() { return this->bcVelocityX1; } + float getBoundaryVelocityX2() { return this->bcVelocityX2; } + float getBoundaryVelocityX3() { return this->bcVelocityX3; } + float getBoundaryVelocity(const int &direction) { switch (direction) { case D3Q27System::E: - return (LBMReal)(UbMath::c4o9 * - (+bcVelocityX1)); //(2/cs^2)(=6)*rho_0(=1 for incompressible)*wi*u*ei with cs=1/sqrt(3) + return (float)(UbMath::c4o9 * + (+bcVelocityX1)); //(2/cs^2)(=6)*rho_0(=1 bei inkompr)*wi*u*ei mit cs=1/sqrt(3) case D3Q27System::W: - return (LBMReal)(UbMath::c4o9 * (-bcVelocityX1)); + return (float)(UbMath::c4o9 * + (-bcVelocityX1)); // z.B. aus paper manfred MRT LB models in three dimensions (2002) case D3Q27System::N: - return (LBMReal)(UbMath::c4o9 * (+bcVelocityX2)); + return (float)(UbMath::c4o9 * (+bcVelocityX2)); case D3Q27System::S: - return (LBMReal)(UbMath::c4o9 * (-bcVelocityX2)); + return (float)(UbMath::c4o9 * (-bcVelocityX2)); case D3Q27System::T: - return (LBMReal)(UbMath::c4o9 * (+bcVelocityX3)); + return (float)(UbMath::c4o9 * (+bcVelocityX3)); case D3Q27System::B: - return (LBMReal)(UbMath::c4o9 * (-bcVelocityX3)); + return (float)(UbMath::c4o9 * (-bcVelocityX3)); case D3Q27System::NE: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2)); + return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2)); case D3Q27System::SW: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2)); + return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2)); case D3Q27System::SE: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2)); + return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2)); case D3Q27System::NW: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2)); + return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2)); case D3Q27System::TE: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3)); + return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3)); case D3Q27System::BW: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3)); + return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3)); case D3Q27System::BE: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3)); + return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3)); case D3Q27System::TW: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3)); + return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3)); case D3Q27System::TN: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3)); case D3Q27System::BS: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3)); case D3Q27System::BN: - return (LBMReal)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3)); case D3Q27System::TS: - return (LBMReal)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3)); case D3Q27System::TNE: - return (LBMReal)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3)); case D3Q27System::BSW: - return (LBMReal)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3)); case D3Q27System::BNE: - return (LBMReal)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3)); case D3Q27System::TSW: - return (LBMReal)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3)); case D3Q27System::TSE: - return (LBMReal)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3)); case D3Q27System::BNW: - return (LBMReal)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3)); case D3Q27System::BSE: - return (LBMReal)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3)); + return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3)); case D3Q27System::TNW: - return (LBMReal)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3)); + return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3)); default: throw UbException(UB_EXARGS, "unknown error"); } @@ -273,44 +273,44 @@ public: return (short)(((densityBoundaryFlags >> (optionDigits * direction)) & maxOptionVal) - 1); } - void setBoundaryDensity(LBMReal density) { this->bcDensity = density; } - LBMReal getBoundaryDensity() { return this->bcDensity; } + void setBoundaryDensity(float density) { this->bcDensity = density; } + float getBoundaryDensity() { return this->bcDensity; } - // Lodi extension - void setDensityLodiDensity(const LBMReal &bcLodiDensity) { this->bcLodiDensity = bcLodiDensity; } - void setDensityLodiVelocityX1(const LBMReal &bcLodiVelocityX1) { this->bcLodiVelocityX1 = bcLodiVelocityX1; } - void setDensityLodiVelocityX2(const LBMReal &bcLodiVelocityX2) { this->bcLodiVelocityX2 = bcLodiVelocityX2; } - void setDensityLodiVelocityX3(const LBMReal &bcLodiVelocityX3) { this->bcLodiVelocityX3 = bcLodiVelocityX3; } - void setDensityLodiLength(const LBMReal &bcLodiLentgh) { this->bcLodiLentgh = bcLodiLentgh; } - LBMReal getDensityLodiDensity() const { return this->bcLodiDensity; } - LBMReal getDensityLodiVelocityX1() const { return this->bcLodiVelocityX1; } - LBMReal getDensityLodiVelocityX2() const { return this->bcLodiVelocityX2; } - LBMReal getDensityLodiVelocityX3() const { return this->bcLodiVelocityX3; } - LBMReal getDensityLodiLength() const { return this->bcLodiLentgh; } + ////Lodi extension + void setDensityLodiDensity(const float &bcLodiDensity) { this->bcLodiDensity = bcLodiDensity; } + void setDensityLodiVelocityX1(const float &bcLodiVelocityX1) { this->bcLodiVelocityX1 = bcLodiVelocityX1; } + void setDensityLodiVelocityX2(const float &bcLodiVelocityX2) { this->bcLodiVelocityX2 = bcLodiVelocityX2; } + void setDensityLodiVelocityX3(const float &bcLodiVelocityX3) { this->bcLodiVelocityX3 = bcLodiVelocityX3; } + void setDensityLodiLength(const float &bcLodiLentgh) { this->bcLodiLentgh = bcLodiLentgh; } + float getDensityLodiDensity() const { return this->bcLodiDensity; } + float getDensityLodiVelocityX1() const { return this->bcLodiVelocityX1; } + float getDensityLodiVelocityX2() const { return this->bcLodiVelocityX2; } + float getDensityLodiVelocityX3() const { return this->bcLodiVelocityX3; } + float getDensityLodiLength() const { return this->bcLodiLentgh; } - LBMReal &densityLodiDensity() { return this->bcLodiDensity; } - LBMReal &densityLodiVelocityX1() { return this->bcLodiVelocityX1; } - LBMReal &densityLodiVelocityX2() { return this->bcLodiVelocityX2; } - LBMReal &densityLodiVelocityX3() { return this->bcLodiVelocityX3; } - LBMReal &densityLodiLentgh() { return this->bcLodiLentgh; } + float &densityLodiDensity() { return this->bcLodiDensity; } + float &densityLodiVelocityX1() { return this->bcLodiVelocityX1; } + float &densityLodiVelocityX2() { return this->bcLodiVelocityX2; } + float &densityLodiVelocityX3() { return this->bcLodiVelocityX3; } + float &densityLodiLentgh() { return this->bcLodiLentgh; } - const LBMReal &densityLodiDensity() const { return this->bcLodiDensity; } - const LBMReal &densityLodiVelocityX1() const { return this->bcLodiVelocityX1; } - const LBMReal &densityLodiVelocityX2() const { return this->bcLodiVelocityX2; } - const LBMReal &densityLodiVelocityX3() const { return this->bcLodiVelocityX3; } - const LBMReal &densityLodiLentgh() const { return this->bcLodiLentgh; } + const float &densityLodiDensity() const { return this->bcLodiDensity; } + const float &densityLodiVelocityX1() const { return this->bcLodiVelocityX1; } + const float &densityLodiVelocityX2() const { return this->bcLodiVelocityX2; } + const float &densityLodiVelocityX3() const { return this->bcLodiVelocityX3; } + const float &densityLodiLentgh() const { return this->bcLodiLentgh; } /*======================= Qs =============================*/ - void setQ(const LBMReal &val, const int &direction) { q[direction] = val; } - LBMReal getQ(const int &direction) { return q[direction]; } + void setQ(const float &val, const int &direction) { q[direction] = val; } + float getQ(const int &direction) { return q[direction]; } virtual std::vector<std::string> getBCNames() { std::vector<std::string> tmp; - tmp.emplace_back("NoSlipBC"); - tmp.emplace_back("SlipBC"); - tmp.emplace_back("VelocityBC"); - tmp.emplace_back("DensityBC"); + tmp.push_back("NoSlipBC"); + tmp.push_back("SlipBC"); + tmp.push_back("VelocityBC"); + tmp.push_back("DensityBC"); return tmp; } virtual std::vector<long long> getBCFlags() @@ -336,7 +336,7 @@ public: static const long long maxOptionVal; // = ( 1<<optionDigits ) - 1; //2^3-1 -> 7 protected: - LBMReal q[D3Q27System::FENDDIR + 1]; + float q[D3Q27System::FENDDIR + 1]; long long noslipBoundaryFlags{ 0 }; long long slipBoundaryFlags{ 0 }; @@ -344,20 +344,26 @@ protected: long long densityBoundaryFlags{ 0 }; long long wallModelBoundaryFlags{ 0 }; - LBMReal bcVelocityX1{ 0.0f }; - LBMReal bcVelocityX2{ 0.0f }; - LBMReal bcVelocityX3{ 0.0f }; - LBMReal bcDensity{ 0.0f }; + float bcVelocityX1{ 0.0f }; + float bcVelocityX2{ 0.0f }; + float bcVelocityX3{ 0.0f }; + float bcDensity{ 0.0f }; + // float bcThixotropy{ 0.0f }; + + float bcLodiDensity{ 0.0f }; + float bcLodiVelocityX1{ 0.0f }; + float bcLodiVelocityX2{ 0.0f }; + float bcLodiVelocityX3{ 0.0f }; + float bcLodiLentgh{ 0.0f }; - LBMReal bcLodiDensity{ 0.0f }; - LBMReal bcLodiVelocityX1{ 0.0f }; - LBMReal bcLodiVelocityX2{ 0.0f }; - LBMReal bcLodiVelocityX3{ 0.0f }; - LBMReal bcLodiLentgh{ 0.0f }; + float nx1{ 0.0f }, nx2{ 0.0f }, nx3{ 0.0f }; - LBMReal nx1{ 0.0f }, nx2{ 0.0f }, nx3{ 0.0f }; + char algorithmType { -1 }; - char algorithmType{ -1 }; +private: + friend class MPIIORestartCoProcessor; + friend class MPIIOMigrationCoProcessor; + friend class MPIIOMigrationBECoProcessor; }; #endif diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt index 871aa123b3922d109d521eedcf83adfc87493e16..ac9da38b2c28d52a9be8d8489abf42247f65506e 100644 --- a/src/cpu/VirtualFluidsCore/CMakeLists.txt +++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt @@ -1,6 +1,11 @@ -vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser) +set(CAB_ADDITIONAL_LINK_LIBRARIES "") +if(BUILD_USE_OPENMP) +list(APPEND CAB_ADDITIONAL_LINK_LIBRARIES OpenMP::OpenMP_CXX) +endif() + +vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser MPI::MPI_CXX ${CAB_ADDITIONAL_LINK_LIBRARIES}) vf_get_library_name(library_name) diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp index a7ba3a84505f1695a0c59175d2d2d8adf3d4dbdb..e85f2806df40e11c7f30cca1c86bcb5dc639ee73 100644 --- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp +++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp @@ -151,8 +151,6 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block) SPtr<ILBMKernel> kernel = block->getKernel(); SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray(); - // knotennummerierung faengt immer bei 0 an! - unsigned int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT; int minX1 = 0; int minX2 = 0; @@ -171,9 +169,9 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block) maxX2 -= 1; maxX3 -= 1; - for (size_t ix3 = minX3; ix3 <= maxX3; ix3++) { - for (size_t ix2 = minX2; ix2 <= maxX2; ix2++) { - for (size_t ix1 = minX1; ix1 <= maxX1; ix1++) { + for (int ix3 = minX3; ix3 <= maxX3; ix3++) { + for (int ix2 = minX2; ix2 <= maxX2; ix2++) { + for (int ix1 = minX1; ix1 <= maxX1; ix1++) { if (!bcArray->isUndefined(ix1, ix2, ix3)) { int index = 0; nodeNumbers(ix1, ix2, ix3) = nr++; @@ -239,6 +237,9 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block) maxX2 -= 1; maxX3 -= 1; + // knotennummerierung faengt immer bei 0 an! + int SWB = 0, SEB = 0, NEB = 0, NWB = 0, SWT = 0, SET = 0, NET = 0, NWT = 0; + // cell vector erstellen for (int ix3 = minX3; ix3 <= maxX3; ix3++) { for (int ix2 = minX2; ix2 <= maxX2; ix2++) { @@ -248,7 +249,9 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block) (SWT = nodeNumbers(ix1, ix2, ix3 + 1)) >= 0 && (SET = nodeNumbers(ix1 + 1, ix2, ix3 + 1)) >= 0 && (NET = nodeNumbers(ix1 + 1, ix2 + 1, ix3 + 1)) >= 0 && (NWT = nodeNumbers(ix1, ix2 + 1, ix3 + 1)) >= 0) { - cells.push_back(makeUbTuple(SWB, SEB, NEB, NWB, SWT, SET, NET, NWT)); + cells.push_back(makeUbTuple((unsigned int)SWB, (unsigned int)SEB, (unsigned int)NEB, + (unsigned int)NWB, (unsigned int)SWT, (unsigned int)SET, + (unsigned int)NET, (unsigned int)NWT)); } } } diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp index 27cf92056c88f1481f256ced4080003ed511f241..eef7d3bf6a0d0ad80aa1bdd7d83dfb469b044584 100644 --- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp +++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp @@ -53,20 +53,24 @@ WriteMacroscopicQuantitiesCoProcessor::WriteMacroscopicQuantitiesCoProcessor(SPt WbWriter *const writer, SPtr<LBMUnitConverter> conv, SPtr<Communicator> comm) - : CoProcessor(grid, s), path(path), writer(writer), conv(conv), comm(comm) + : CoProcessor(grid, s), path(path), writer(writer), conv(conv), comm(comm) { - gridRank = comm->getProcessID(); + gridRank = comm->getProcessID(); minInitLevel = this->grid->getCoarsestInitializedLevel(); maxInitLevel = this->grid->getFinestInitializedLevel(); blockVector.resize(maxInitLevel + 1); - for (int level = minInitLevel; level <= maxInitLevel; level++) { + for (int level = minInitLevel; level <= maxInitLevel; level++) + { grid->getBlocks(level, gridRank, true, blockVector[level]); } } + ////////////////////////////////////////////////////////////////////////// -void WriteMacroscopicQuantitiesCoProcessor::init() {} +void WriteMacroscopicQuantitiesCoProcessor::init() +{} + ////////////////////////////////////////////////////////////////////////// void WriteMacroscopicQuantitiesCoProcessor::process(double step) { @@ -75,14 +79,18 @@ void WriteMacroscopicQuantitiesCoProcessor::process(double step) UBLOG(logDEBUG3, "WriteMacroscopicQuantitiesCoProcessor::update:" << step); } + ////////////////////////////////////////////////////////////////////////// void WriteMacroscopicQuantitiesCoProcessor::collectData(double step) { int istep = static_cast<int>(step); - for (int level = minInitLevel; level <= maxInitLevel; level++) { - for (SPtr<Block3D> block : blockVector[level]) { - if (block) { + for (int level = minInitLevel; level <= maxInitLevel; level++) + { + for (SPtr<Block3D> block : blockVector[level]) + { + if (block) + { addDataMQ(block); } } @@ -93,27 +101,29 @@ void WriteMacroscopicQuantitiesCoProcessor::collectData(double step) subfolder = "mq" + UbSystem::toString(istep); pfilePath = path + "/mq/" + subfolder; cfilePath = path + "/mq/mq_collection"; - partPath = pfilePath + "/mq" + UbSystem::toString(gridRank) + "_" + UbSystem::toString(istep); + partPath = pfilePath + "/mq" + UbSystem::toString(gridRank) + "_" + UbSystem::toString(istep); std::string partName = writer->writeOctsWithNodeData(partPath, nodes, cells, datanames, data); - size_t found = partName.find_last_of("/"); - std::string piece = partName.substr(found + 1); - piece = subfolder + "/" + piece; + size_t found = partName.find_last_of("/"); + std::string piece = partName.substr(found + 1); + piece = subfolder + "/" + piece; std::vector<std::string> cellDataNames; std::vector<std::string> pieces; pieces.push_back(piece); if (comm->getProcessID() == comm->getRoot()) { std::string pname = - WbWriterVtkXmlASCII::getInstance()->writeParallelFile(pfilePath, pieces, datanames, cellDataNames); + WbWriterVtkXmlASCII::getInstance()->writeParallelFile(pfilePath, pieces, datanames, cellDataNames); found = pname.find_last_of("/"); piece = pname.substr(found + 1); std::vector<std::string> filenames; filenames.push_back(piece); - if (step == CoProcessor::scheduler->getMinBegin()) { + if (step == CoProcessor::scheduler->getMinBegin()) + { WbWriterVtkXmlASCII::getInstance()->writeCollection(cfilePath, filenames, istep, false); - } else { + } else + { WbWriterVtkXmlASCII::getInstance()->addFilesToCollection(cfilePath, filenames, istep, false); } UBLOG(logINFO, "WriteMacroscopicQuantitiesCoProcessor step: " << istep); @@ -121,6 +131,7 @@ void WriteMacroscopicQuantitiesCoProcessor::collectData(double step) clearData(); } + ////////////////////////////////////////////////////////////////////////// void WriteMacroscopicQuantitiesCoProcessor::clearData() { @@ -129,16 +140,23 @@ void WriteMacroscopicQuantitiesCoProcessor::clearData() datanames.clear(); data.clear(); } + ////////////////////////////////////////////////////////////////////////// void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block) { - // This data is written: + double level = (double)block->getLevel(); + + // Diese Daten werden geschrieben: datanames.resize(0); - datanames.emplace_back("DRho"); - datanames.emplace_back("Press"); - datanames.emplace_back("Vx"); - datanames.emplace_back("Vy"); - datanames.emplace_back("Vz"); + datanames.push_back("Rho"); + datanames.push_back("Vx"); + datanames.push_back("Vy"); + datanames.push_back("Vz"); + // datanames.push_back("Press"); + datanames.push_back("Level"); + // datanames.push_back("BlockID"); + // datanames.push_back("gamma"); + // datanames.push_back("collFactor"); data.resize(datanames.size()); @@ -146,10 +164,10 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block) SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray(); SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions(); LBMReal f[D3Q27System::ENDF + 1]; - LBMReal vx1, vx2, vx3, drho, press; + LBMReal vx1, vx2, vx3, rho; - // node numbering always starts at 0! - unsigned int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT; + // knotennummerierung faengt immer bei 0 an! + int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT; if (block->getKernel()->getCompressible()) { calcMacros = &D3Q27System::calcCompMacroscopicValues; @@ -165,12 +183,21 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block) int maxX2 = (int)(distributions->getNX2()); int maxX3 = (int)(distributions->getNX3()); - // assign numbers and create node vector + collect data + // int minX1 = 1; + // int minX2 = 1; + // int minX3 = 1; + + // int maxX1 = (int)(distributions->getNX1()); + // int maxX2 = (int)(distributions->getNX2()); + // int maxX3 = (int)(distributions->getNX3()); + + // nummern vergeben und node vector erstellen + daten sammeln CbArray3D<int> nodeNumbers((int)maxX1, (int)maxX2, (int)maxX3, -1); maxX1 -= 2; maxX2 -= 2; maxX3 -= 2; + // D3Q27BoundaryConditionPtr bcPtr; int nr = (int)nodes.size(); for (int ix3 = minX3; ix3 <= maxX3; ix3++) { @@ -180,44 +207,60 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block) int index = 0; nodeNumbers(ix1, ix2, ix3) = nr++; Vector3D worldCoordinates = grid->getNodeCoordinates(block, ix1, ix2, ix3); - nodes.emplace_back(float(worldCoordinates[0]), float(worldCoordinates[1]), - float(worldCoordinates[2])); + nodes.push_back(UbTupleFloat3(float(worldCoordinates[0]), float(worldCoordinates[1]), + float(worldCoordinates[2]))); distributions->getDistribution(f, ix1, ix2, ix3); - calcMacros(f, drho, vx1, vx2, vx3); - press = D3Q27System::calcPress(f, drho, vx1, vx2, vx3); - - if (UbMath::isNaN(drho) || UbMath::isInfinity(drho)) - UB_THROW(UbException( - UB_EXARGS, "drho is not a number (nan or -1.#IND) or infinity number -1.#INF in block=" + - block->toString() + ", node=" + UbSystem::toString(ix1) + "," + - UbSystem::toString(ix2) + "," + UbSystem::toString(ix3))); + calcMacros(f, rho, vx1, vx2, vx3); + double press = D3Q27System::getPressure(f); // D3Q27System::calcPress(f,rho,vx1,vx2,vx3); + + if (UbMath::isNaN(rho) || UbMath::isInfinity(rho)) + // UB_THROW( UbException(UB_EXARGS,"rho is not a number (nan or -1.#IND) or infinity number + // -1.#INF in block="+block->toString()+", + // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3))); + rho = 999.0; if (UbMath::isNaN(press) || UbMath::isInfinity(press)) - UB_THROW(UbException( - UB_EXARGS, "press is not a number (nan or -1.#IND) or infinity number -1.#INF in block=" + - block->toString() + ", node=" + UbSystem::toString(ix1) + "," + - UbSystem::toString(ix2) + "," + UbSystem::toString(ix3))); + // UB_THROW( UbException(UB_EXARGS,"press is not a number (nan or -1.#IND) or infinity number + // -1.#INF in block="+block->toString()+", + // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3))); + press = 999.0; if (UbMath::isNaN(vx1) || UbMath::isInfinity(vx1)) - UB_THROW(UbException( - UB_EXARGS, "vx1 is not a number (nan or -1.#IND) or infinity number -1.#INF in block=" + - block->toString() + ", node=" + UbSystem::toString(ix1) + "," + - UbSystem::toString(ix2) + "," + UbSystem::toString(ix3))); + // UB_THROW( UbException(UB_EXARGS,"vx1 is not a number (nan or -1.#IND) or infinity number + // -1.#INF in block="+block->toString()+", + // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3))); + vx1 = 999.0; if (UbMath::isNaN(vx2) || UbMath::isInfinity(vx2)) - UB_THROW(UbException( - UB_EXARGS, "vx2 is not a number (nan or -1.#IND) or infinity number -1.#INF in block=" + - block->toString() + ", node=" + UbSystem::toString(ix1) + "," + - UbSystem::toString(ix2) + "," + UbSystem::toString(ix3))); + // UB_THROW( UbException(UB_EXARGS,"vx2 is not a number (nan or -1.#IND) or infinity number + // -1.#INF in block="+block->toString()+", + // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3))); + vx2 = 999.0; if (UbMath::isNaN(vx3) || UbMath::isInfinity(vx3)) - UB_THROW(UbException( - UB_EXARGS, "vx3 is not a number (nan or -1.#IND) or infinity number -1.#INF in block=" + - block->toString() + ", node=" + UbSystem::toString(ix1) + "," + - UbSystem::toString(ix2) + "," + UbSystem::toString(ix3))); - - data[index++].push_back(drho * conv->getFactorDensityLbToW()); - data[index++].push_back(press * conv->getFactorPressureLbToW()); - data[index++].push_back(vx1 * conv->getFactorVelocityLbToW()); - data[index++].push_back(vx2 * conv->getFactorVelocityLbToW()); - data[index++].push_back(vx3 * conv->getFactorVelocityLbToW()); + // UB_THROW( UbException(UB_EXARGS,"vx3 is not a number (nan or -1.#IND) or infinity number + // -1.#INF in block="+block->toString()+", + // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3))); + vx3 = 999.0; + + data[index++].push_back(rho); + data[index++].push_back(vx1); + data[index++].push_back(vx2); + data[index++].push_back(vx3); + + // shearRate = D3Q27System::getShearRate(f, collFactor); + + // LBMReal collFactorF = BinghamModelLBMKernel::getBinghamCollFactor(collFactor, yieldStress, + // shearRate, rho); + + // data[index++].push_back(shearRate); + // data[index++].push_back(collFactorF); + + // data[index++].push_back((rho+1.0) * conv->getFactorDensityLbToW() ); + // data[index++].push_back(vx1 * conv->getFactorVelocityLbToW()); + // data[index++].push_back(vx2 * conv->getFactorVelocityLbToW()); + // data[index++].push_back(vx3 * conv->getFactorVelocityLbToW()); + // data[index++].push_back((press * conv->getFactorPressureLbToW()) / ((rho+1.0) * + // conv->getFactorDensityLbToW())); + data[index++].push_back(level); + // data[index++].push_back(blockID); } } } @@ -234,7 +277,9 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block) (SWT = nodeNumbers(ix1, ix2, ix3 + 1)) >= 0 && (SET = nodeNumbers(ix1 + 1, ix2, ix3 + 1)) >= 0 && (NET = nodeNumbers(ix1 + 1, ix2 + 1, ix3 + 1)) >= 0 && (NWT = nodeNumbers(ix1, ix2 + 1, ix3 + 1)) >= 0) { - cells.push_back(makeUbTuple(SWB, SEB, NEB, NWB, SWT, SET, NET, NWT)); + cells.push_back(makeUbTuple((unsigned int)SWB, (unsigned int)SEB, (unsigned int)NEB, + (unsigned int)NWB, (unsigned int)SWT, (unsigned int)SET, + (unsigned int)NET, (unsigned int)NWT)); } } } diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp index d7e2286d4599aabf4e7b6e4c9d6a824b38e6d873..6f8a6e74664cf82a550b9000071d4f6beb9ebac2 100644 --- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp +++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp @@ -47,7 +47,7 @@ D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t this->nonLocalDistributions = std::make_shared<CbArray4D<LBMReal, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value); - this->restDistributions = std::make_shared<CbArray3D<LBMReal, IndexerX3X2X1>>(nx1, nx2, nx3, value); + this->zeroDistributions = std::make_shared<CbArray3D<LBMReal, IndexerX3X2X1>>(nx1, nx2, nx3, value); } ////////////////////////////////////////////////////////////////////////// D3Q27EsoTwist3DSplittedVector::~D3Q27EsoTwist3DSplittedVector() = default; @@ -84,7 +84,7 @@ void D3Q27EsoTwist3DSplittedVector::getDistribution(LBMReal *const f, size_t x1, f[D3Q27System::BNW] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1); f[D3Q27System::BNE] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1); - f[D3Q27System::REST] = (*this->restDistributions)(x1, x2, x3); + f[D3Q27System::ZERO] = (*this->zeroDistributions)(x1, x2, x3); } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) @@ -117,7 +117,7 @@ void D3Q27EsoTwist3DSplittedVector::setDistribution(const LBMReal *const f, size (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::INV_BNW]; (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::INV_BNE]; - (*this->restDistributions)(x1, x2, x3) = f[D3Q27System::REST]; + (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::ZERO]; } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) @@ -150,7 +150,7 @@ void D3Q27EsoTwist3DSplittedVector::getDistributionInv(LBMReal *const f, size_t f[D3Q27System::INV_BNW] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1); f[D3Q27System::INV_BNE] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1); - f[D3Q27System::REST] = (*this->restDistributions)(x1, x2, x3); + f[D3Q27System::ZERO] = (*this->zeroDistributions)(x1, x2, x3); } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) @@ -183,7 +183,7 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const LBMReal *const f, s (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::BNW]; (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::BNE]; - (*this->restDistributions)(x1, x2, x3) = f[D3Q27System::REST]; + (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::ZERO]; } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3, @@ -241,8 +241,8 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const LBMReal *c (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::BNE]; if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW) (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::TSW]; - if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST) - (*this->restDistributions)(x1, x2, x3) = f[D3Q27System::REST]; + if ((direction & EsoTwistD3Q27System::ZERO) == EsoTwistD3Q27System::ZERO) + (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::ZERO]; } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, @@ -327,8 +327,8 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_ case D3Q27System::TSW: (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f; break; - case D3Q27System::REST: - (*this->restDistributions)(x1, x2, x3) = f; + case D3Q27System::ZERO: + (*this->zeroDistributions)(x1, x2, x3) = f; break; default: UB_THROW(UbException(UB_EXARGS, "Direction didn't find")); @@ -390,8 +390,8 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(const LBMReal (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::BNE]; if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW) (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::TSW]; - if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST) - (*this->restDistributions)(x1, x2, x3) = f[D3Q27System::REST]; + if ((direction & EsoTwistD3Q27System::ZERO) == EsoTwistD3Q27System::ZERO) + (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::ZERO]; } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, @@ -476,8 +476,8 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, si case D3Q27System::TSW: (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f; break; - case D3Q27System::REST: - (*this->restDistributions)(x1, x2, x3) = f; + case D3Q27System::ZERO: + (*this->zeroDistributions)(x1, x2, x3) = f; break; default: UB_THROW(UbException(UB_EXARGS, "Direction didn't find")); @@ -539,8 +539,8 @@ LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionForDirection(size_t x1, si return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3); case D3Q27System::BNE: return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1); - case D3Q27System::REST: - return (*this->restDistributions)(x1, x2, x3); + case D3Q27System::ZERO: + return (*this->zeroDistributions)(x1, x2, x3); default: UB_THROW(UbException(UB_EXARGS, "Direction didn't find")); } @@ -601,8 +601,8 @@ LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionInvForDirection(size_t x1, return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3); case D3Q27System::TSW: return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1); - case D3Q27System::REST: - return (*this->restDistributions)(x1, x2, x3); + case D3Q27System::ZERO: + return (*this->zeroDistributions)(x1, x2, x3); default: UB_THROW(UbException(UB_EXARGS, "Direction didn't find")); } @@ -626,7 +626,7 @@ CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector: ////////////////////////////////////////////////////////////////////////// CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr D3Q27EsoTwist3DSplittedVector::getZeroDistributions() { - return this->restDistributions; + return this->zeroDistributions; } ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setNX1(size_t newNX1) { NX1 = newNX1; } @@ -647,7 +647,7 @@ void D3Q27EsoTwist3DSplittedVector::setNonLocalDistributions(CbArray4D<LBMReal, ////////////////////////////////////////////////////////////////////////// void D3Q27EsoTwist3DSplittedVector::setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr array) { - restDistributions = array; + zeroDistributions = array; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h index df44457b04a36918643400d60ec5f514e32982ea..1c0d7d05f1392c8c116863e9e0b41000c90ed15e 100644 --- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h +++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h @@ -100,7 +100,7 @@ public: protected: CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions; CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions; - CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions; + CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions; size_t NX1, NX2, NX3; friend class MPIIORestartCoProcessor; diff --git a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h index c0171588bcd4a74680326c44db062dba63d4c41c..4930beeab491caf541c436a2e60b326b7cd54c64 100644 --- a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h +++ b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h @@ -51,6 +51,9 @@ public: SPtr<DistributionArray3D> getFdistributions() const; void setFdistributions(SPtr<DistributionArray3D> distributions); + SPtr<DistributionArray3D> getHdistributions() const; + void setHdistributions(SPtr<DistributionArray3D> distributions); + SPtr<AverageValuesArray3D> getAverageDensity() const; void setAverageDensity(SPtr<AverageValuesArray3D> values); @@ -71,10 +74,12 @@ public: SPtr<RelaxationFactorArray3D> getRelaxationFactor() const; void setRelaxationFactor(SPtr<RelaxationFactorArray3D> values); - protected: private: SPtr<DistributionArray3D> fdistributions; + + SPtr<DistributionArray3D> hdistributions; + SPtr<AverageValuesArray3D> averageValues; SPtr<AverageValuesArray3D> averageDensity; @@ -85,40 +90,96 @@ private: SPtr<ShearStressValuesArray3D> shearStressValues; SPtr<RelaxationFactorArray3D> relaxationFactor; + }; -inline SPtr<DistributionArray3D> DataSet3D::getFdistributions() const { return fdistributions; } +inline SPtr<DistributionArray3D> DataSet3D::getFdistributions() const +{ + return fdistributions; +} -inline void DataSet3D::setFdistributions(SPtr<DistributionArray3D> distributions) { fdistributions = distributions; } +inline void DataSet3D::setFdistributions(SPtr<DistributionArray3D> distributions) +{ + fdistributions = distributions; +} -inline SPtr<AverageValuesArray3D> DataSet3D::getAverageValues() const { return averageValues; } +inline SPtr<DistributionArray3D> DataSet3D::getHdistributions() const +{ + return hdistributions; +} -inline void DataSet3D::setAverageValues(SPtr<AverageValuesArray3D> values) { averageValues = values; } +inline void DataSet3D::setHdistributions(SPtr<DistributionArray3D> distributions) +{ + hdistributions = distributions; +} -inline SPtr<AverageValuesArray3D> DataSet3D::getAverageDensity() const { return averageDensity; } +inline SPtr<AverageValuesArray3D> DataSet3D::getAverageValues() const +{ + return averageValues; +} -inline void DataSet3D::setAverageDensity(SPtr<AverageValuesArray3D> values) { averageDensity = values; } +inline void DataSet3D::setAverageValues(SPtr<AverageValuesArray3D> values) +{ + averageValues = values; +} -inline SPtr<AverageValuesArray3D> DataSet3D::getAverageVelocity() const { return averageVelocity; } +inline SPtr<AverageValuesArray3D> DataSet3D::getAverageDensity() const +{ + return averageDensity; +} -inline void DataSet3D::setAverageVelocity(SPtr<AverageValuesArray3D> values) { averageVelocity = values; } +inline void DataSet3D::setAverageDensity(SPtr<AverageValuesArray3D> values) +{ + averageDensity = values; +} -inline SPtr<AverageValuesArray3D> DataSet3D::getAverageFluctuations() const { return averageFluktuations; } +inline SPtr<AverageValuesArray3D> DataSet3D::getAverageVelocity() const +{ + return averageVelocity; +} + +inline void DataSet3D::setAverageVelocity(SPtr<AverageValuesArray3D> values) +{ + averageVelocity = values; +} -inline void DataSet3D::setAverageFluctuations(SPtr<AverageValuesArray3D> values) { averageFluktuations = values; } +inline SPtr<AverageValuesArray3D> DataSet3D::getAverageFluctuations() const +{ + return averageFluktuations; +} + +inline void DataSet3D::setAverageFluctuations(SPtr<AverageValuesArray3D> values) +{ + averageFluktuations = values; +} -inline SPtr<AverageValuesArray3D> DataSet3D::getAverageTriplecorrelations() const { return averageTriplecorrelations; } +inline SPtr<AverageValuesArray3D> DataSet3D::getAverageTriplecorrelations() const +{ + return averageTriplecorrelations; +} inline void DataSet3D::setAverageTriplecorrelations(SPtr<AverageValuesArray3D> values) { averageTriplecorrelations = values; } -inline SPtr<ShearStressValuesArray3D> DataSet3D::getShearStressValues() const { return shearStressValues; } +inline SPtr<ShearStressValuesArray3D> DataSet3D::getShearStressValues() const +{ + return shearStressValues; +} -inline void DataSet3D::setShearStressValues(SPtr<ShearStressValuesArray3D> values) { shearStressValues = values; } +inline void DataSet3D::setShearStressValues(SPtr<ShearStressValuesArray3D> values) +{ + shearStressValues = values; +} -inline SPtr<RelaxationFactorArray3D> DataSet3D::getRelaxationFactor() const { return relaxationFactor; } +inline SPtr<RelaxationFactorArray3D> DataSet3D::getRelaxationFactor() const +{ + return relaxationFactor; +} -inline void DataSet3D::setRelaxationFactor(SPtr<RelaxationFactorArray3D> values) { relaxationFactor = values; } +inline void DataSet3D::setRelaxationFactor(SPtr<RelaxationFactorArray3D> values) +{ + relaxationFactor = values; +} #endif diff --git a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h index 242c6b7a1c156216a88d4c06c4945040383e39af..8fe4dccea1b53da0513a093e8a741cd0071caf48 100644 --- a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h +++ b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h @@ -41,9 +41,9 @@ class DistributionArray3D { public: DistributionArray3D() = default; - ; + virtual ~DistributionArray3D() = default; - ; + //! get number of nodes for x1 direction virtual size_t getNX1() const = 0; //! get number of nodes for x2 direction diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h index 689589222634edfc52dcfbf358ffc8d32dd1186b..319a9200cc204b0f9b869b2e52353e717a89d783 100644 --- a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h +++ b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h @@ -43,13 +43,23 @@ // Geier, M., & Schönherr, M. (2017). Esoteric twist: an efficient in-place streaming algorithmus for the lattice // Boltzmann method on massively parallel hardware. Computation, 5(2), 19. +class EsoTwistD3Q27UnrollArray +{ +}; +class EsoTwistPlusD3Q27UnrollArray +{ +}; +class EsoTwistPlusD3Q19UnrollArray +{ +}; + class EsoTwist3D : public DistributionArray3D { public: EsoTwist3D() = default; - ; + ~EsoTwist3D() override = default; - ; + ////////////////////////////////////////////////////////////////////////// void swap() override = 0; ////////////////////////////////////////////////////////////////////////// diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp index c456be678449744475a0ac6932850dceb0ee6f1c..1a13aa008ab49a48f1d16c7a2a71ea39dfb191ab 100644 --- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp +++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp @@ -35,7 +35,7 @@ // index 0 1 2 3 4 5 6 7 8 9 10 11 12 // 13 14 15 16 17 18 19 20 21 22 23 24 25 26 f: E, W, N, S, T, B, NE, SW, SE, NW, TE, BW, BE, TW, TN, -// BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE BSW REST +// BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE BSW ZERO const int EsoTwistD3Q27System::ETX1[EsoTwistD3Q27System::ENDF + 1] = { 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0 }; const int EsoTwistD3Q27System::ETX2[EsoTwistD3Q27System::ENDF + 1] = { 0, 0, 0, 1, 0, 0, 0, 1, 0, -1, 0, 0, 0, 0, @@ -49,7 +49,7 @@ const int EsoTwistD3Q27System::etINVDIR[EsoTwistD3Q27System::ENDF + 1] = { D3Q27System::INV_TE, D3Q27System::INV_BW, D3Q27System::INV_BE, D3Q27System::INV_TW, D3Q27System::INV_TN, D3Q27System::INV_BS, D3Q27System::INV_BN, D3Q27System::INV_TS, D3Q27System::INV_TNE, D3Q27System::INV_TNW, D3Q27System::INV_TSE, D3Q27System::INV_TSW, D3Q27System::INV_BNE, D3Q27System::INV_BNW, D3Q27System::INV_BSE, - D3Q27System::INV_BSW, D3Q27System::REST + D3Q27System::INV_BSW, D3Q27System::ZERO }; const unsigned long int EsoTwistD3Q27System::etDIR[EsoTwistD3Q27System::ENDF + 1] = { diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h index 21752cc48a84b02bc24cb7efe9e3c5912f476dfd..a9214673ec4b4a66a52fa53f9b625ead0180768b 100644 --- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h +++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h @@ -47,7 +47,7 @@ struct EsoTwistD3Q27System { const static int STARTDIR = D3Q27System::STARTDIR; const static int ENDDIR = D3Q27System::ENDDIR; - static const int REST = D3Q27System::REST; /*f0 */ + static const int ZERO = D3Q27System::ZERO; /*f0 */ static const int E = D3Q27System::E; /*f1 */ static const int W = D3Q27System::W; /*f2 */ static const int N = D3Q27System::N; /*f3 */ diff --git a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp index 724d855e3dbfecbe3388ad4071a2a1b1666c1010..f1b2e5ad8c62babaea92cea50a646434f9757cd9 100644 --- a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp +++ b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp @@ -127,7 +127,7 @@ void BasicCalculator::calculate() if (refinement) { if (straightStartLevel < maxInitLevel) exchangeBlockData(straightStartLevel, maxInitLevel); - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// #ifdef TIMING time[4] = timer.stop(); UBLOG(logINFO, "refinement exchangeBlockData time = " << time[4]); @@ -155,14 +155,16 @@ void BasicCalculator::calculate() } catch (std::exception &e) { UBLOG(logERROR, e.what()); UBLOG(logERROR, " step = " << calcStep); - // throw; - exit(EXIT_FAILURE); + // throw e; + // exit(EXIT_FAILURE); } catch (std::string &s) { UBLOG(logERROR, s); - exit(EXIT_FAILURE); + // exit(EXIT_FAILURE); + // throw s; } catch (...) { UBLOG(logERROR, "unknown exception"); - exit(EXIT_FAILURE); + // exit(EXIT_FAILURE); + // throw; } } ////////////////////////////////////////////////////////////////////////// @@ -173,28 +175,27 @@ void BasicCalculator::calculateBlocks(int startLevel, int maxInitLevel, int calc #endif { SPtr<Block3D> blockTemp; - try { - // startLevel bis maxInitLevel - for (int level = startLevel; level <= maxInitLevel; level++) { - // timer.resetAndStart(); - // call LBM kernel - int size = (int)blocks[level].size(); + // startLevel bis maxInitLevel + for (int level = startLevel; level <= maxInitLevel; level++) { + // timer.resetAndStart(); + // call LBM kernel + int size = (int)blocks[level].size(); #ifdef _OPENMP #pragma omp for schedule(OMP_SCHEDULE) #endif - for (int i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { + try { blockTemp = blocks[level][i]; blockTemp->getKernel()->calculate(calcStep); + } catch (std::exception &e) { + UBLOG(logERROR, e.what()); + UBLOG(logERROR, blockTemp->toString() << " step = " << calcStep); + std::exit(EXIT_FAILURE); } - // timer.stop(); - // UBLOG(logINFO, "level = " << level << " blocks = " << blocks[level].size() << " collision time = " << - // timer.getTotalTime()); } - } catch (std::exception &e) { - UBLOG(logERROR, e.what()); - // UBLOG(logERROR, blockTemp->toString()<<" step = "<<calcStep); - // throw; - exit(EXIT_FAILURE); + // timer.stop(); + // UBLOG(logINFO, "level = " << level << " blocks = " << blocks[level].size() << " collision time = " << + // timer.getTotalTime()); } } } @@ -239,8 +240,13 @@ void BasicCalculator::connectorsPrepareLocal(std::vector<SPtr<Block3DConnector>> #pragma omp parallel for schedule(OMP_SCHEDULE) #endif for (int i = 0; i < size; i++) { - connectors[i]->prepareForReceive(); - connectors[i]->prepareForSend(); + try { + connectors[i]->prepareForReceive(); + connectors[i]->prepareForSend(); + } catch (std::exception &e) { + UBLOG(logERROR, e.what()); + std::exit(EXIT_FAILURE); + } } } ////////////////////////////////////////////////////////////////////////// @@ -251,8 +257,13 @@ void BasicCalculator::connectorsSendLocal(std::vector<SPtr<Block3DConnector>> &c #pragma omp parallel for schedule(OMP_SCHEDULE) #endif for (int i = 0; i < size; i++) { - connectors[i]->fillSendVectors(); - connectors[i]->sendVectors(); + try { + connectors[i]->fillSendVectors(); + connectors[i]->sendVectors(); + } catch (std::exception &e) { + UBLOG(logERROR, e.what()); + std::exit(EXIT_FAILURE); + } } } ////////////////////////////////////////////////////////////////////////// @@ -321,36 +332,43 @@ void BasicCalculator::applyPreCollisionBC(int startLevel, int maxInitLevel) #pragma omp parallel for schedule(OMP_SCHEDULE) #endif for (int i = 0; i < size; i++) { - blocks[level][i]->getKernel()->getBCProcessor()->applyPreCollisionBC(); + try { + blocks[level][i]->getKernel()->getBCProcessor()->applyPreCollisionBC(); + } catch (std::exception &e) { + UBLOG(logERROR, e.what()); + exit(EXIT_FAILURE); + } catch (std::string &s) { + UBLOG(logERROR, s); + exit(EXIT_FAILURE); + } catch (...) { + UBLOG(logERROR, "unknown exception"); + exit(EXIT_FAILURE); + } } } } ////////////////////////////////////////////////////////////////////////// void BasicCalculator::applyPostCollisionBC(int startLevel, int maxInitLevel) { - try { - // from startLevel to maxInitLevel - for (int level = startLevel; level <= maxInitLevel; level++) { - int size = (int)blocks[level].size(); + // from startLevel to maxInitLevel + for (int level = startLevel; level <= maxInitLevel; level++) { + int size = (int)blocks[level].size(); #ifdef _OPENMP #pragma omp parallel for schedule(OMP_SCHEDULE) #endif - for (int i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { + try { blocks[level][i]->getKernel()->getBCProcessor()->applyPostCollisionBC(); + } catch (std::exception &e) { + UBLOG(logERROR, e.what()); + exit(EXIT_FAILURE); + } catch (std::string &s) { + UBLOG(logERROR, s); + exit(EXIT_FAILURE); + } catch (...) { + UBLOG(logERROR, "unknown exception"); + exit(EXIT_FAILURE); } } - } catch (std::exception &e) { - UBLOG(logERROR, e.what()); - // UBLOG(logERROR, " step = "<<calcStep); - // throw; - exit(EXIT_FAILURE); - } catch (std::string &s) { - UBLOG(logERROR, s); - // throw; - exit(EXIT_FAILURE); - } catch (...) { - UBLOG(logERROR, "unknown exception"); - // throw; - exit(EXIT_FAILURE); } } diff --git a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.h b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.h index fb6e68ce0a56ed7e407ab23605a1facd83eace52..3ef1f4c712e552ea5d5b5e82306e2bd94d74d7ab 100644 --- a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.h +++ b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.h @@ -39,7 +39,8 @@ class Block3DConnector; //! \class BasicCalculator -//! \brief Class implements basic functionality with OpenMP parallelization for main calculation LBM loop +//! \brief Class implements basic functionality with MPI + OpenMP parallelization for main calculation loop +//! \author Konstantin Kutscher class BasicCalculator : public Calculator { diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp index 5e921c4f20390da51724f8e53caaab23e0270549..79753c144f5cfff831f1d0415e9434c50b11bcea 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp +++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp @@ -318,6 +318,13 @@ void Block3D::deleteInterpolationFlag() interpolationFlagCF = 0; } ////////////////////////////////////////////////////////////////////////// +double Block3D::getWorkLoad() +{ + double l = kernel->getCalculationTime(); + l *= static_cast<double>(1 << level); + return l; +} +////////////////////////////////////////////////////////////////////////// std::string Block3D::toString() { std::stringstream ss; diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.h b/src/cpu/VirtualFluidsCore/Grid/Block3D.h index 7a4a2aad75825d6e2aabd252f9c480ca3a96c5a5..b2279b069e6ee322023d30419f8eed5c587f95e8 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Block3D.h +++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.h @@ -134,6 +134,8 @@ public: bool hasInterpolationFlagFC(int dir); bool hasInterpolationFlagFC(); + double getWorkLoad(); + std::string toString(); static int getMaxGlobalID() { return counter; } diff --git a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp index 083b30b9a4080183b2543dfe14f33d3551a093ec..fbeb2de979bb31dfb87441b5cfcfdf3393f0043c 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp +++ b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp @@ -124,11 +124,11 @@ void Calculator::initRemoteConnectors() // grid->getBlocks(level, gridRank, true, blockVector); grid->getBlocks(l, blockVector); for (SPtr<Block3D> block : blockVector) { - int l = block->getLevel(); - block->pushBackRemoteSameLevelConnectors(remoteConns[l]); + int block_level = block->getLevel(); + block->pushBackRemoteSameLevelConnectors(remoteConns[block_level]); - block->pushBackRemoteInterpolationConnectorsCF(remoteInterConnsCF[l]); - block->pushBackRemoteInterpolationConnectorsFC(remoteInterConnsFC[l]); + block->pushBackRemoteInterpolationConnectorsCF(remoteInterConnsCF[block_level]); + block->pushBackRemoteInterpolationConnectorsFC(remoteInterConnsFC[block_level]); } } diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp index c78fda22d5f1b2fe95fcac5a94435f72cea1c6a5..92be5ed5a06e1909a34144cdd0d1b31000309281 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp +++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp @@ -153,6 +153,17 @@ bool Grid3D::deleteBlock(int ix1, int ix2, int ix3, int level) return false; } } +void Grid3D::deleteBlocks() +{ + std::vector<std::vector<SPtr<Block3D>>> blocksVector(25); + int minInitLevel = Grid3DSystem::MINLEVEL; + int maxInitLevel = Grid3DSystem::MAXLEVEL; + for (int level = minInitLevel; level < maxInitLevel; level++) { + getBlocks(level, blocksVector[level]); + for (SPtr<Block3D> block : blocksVector[level]) // blocks of the current level + deleteBlock(block); + } +} ////////////////////////////////////////////////////////////////////////// void Grid3D::replaceBlock(SPtr<Block3D> block) { @@ -1328,7 +1339,7 @@ void Grid3D::getNeighborBlocksForDirectionWithDirZero(int dir, int ix1, int ix2, case Grid3DSystem::BSW: this->getNeighborsBottomSouthWest(ix1, ix2, ix3, level, levelDepth, blocks); break; - case Grid3DSystem::REST: + case Grid3DSystem::ZERO: this->getNeighborsZero(ix1, ix2, ix3, level, levelDepth, blocks); break; default: diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h index 69c5106847bba69b651ee2c9ed84c0616798b1c3..84c821e84b8c98f17e39814a211de3262e75f804 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h +++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h @@ -74,6 +74,7 @@ public: void addBlock(SPtr<Block3D> block); bool deleteBlock(SPtr<Block3D> block); bool deleteBlock(int ix1, int ix2, int ix3, int level); + void deleteBlocks(); void deleteBlocks(const std::vector<int> &ids); void replaceBlock(SPtr<Block3D> block); SPtr<Block3D> getBlock(int ix1, int ix2, int ix3, int level) const; diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3DSystem.h b/src/cpu/VirtualFluidsCore/Grid/Grid3DSystem.h index 008e38b88aec6be8695411d51c263084816fd2e1..ee61b8f7327e76a9393d4d3caa13c3a796470c08 100644 --- a/src/cpu/VirtualFluidsCore/Grid/Grid3DSystem.h +++ b/src/cpu/VirtualFluidsCore/Grid/Grid3DSystem.h @@ -71,7 +71,7 @@ static const int BNE = 22; static const int BNW = 23; static const int BSE = 24; static const int BSW = 25; -static const int REST /*f0 */ = 26; +static const int ZERO /*f0 */ = 26; static const int ENDDIR = 25; @@ -104,6 +104,7 @@ static const int INV_BSW = TNE; extern const int INVDIR[ENDDIR + 1]; +static const int MINLEVEL = 0; static const int MAXLEVEL = 25; extern const int EX1[ENDDIR + 1]; diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp index 55ce8d1dd332c51d1acd8e1285ea0e81ec9ab00f..bf1895b930f1c61d36d537319b53fe4b0abcd960 100644 --- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp +++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp @@ -219,7 +219,7 @@ void D3Q27Interactor::initInteractor(const double &timeStep) else this->unsetTimeDependent(); - Interactor3D::initInteractor(timeStep); + updateBlocks(); } ////////////////////////////////////////////////////////////////////////// void D3Q27Interactor::updateInteractor(const double ×tep) diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp index 0127c9c880f03d574f657ebc43e53ccaa4b67c7e..84526c62598b1d718b1f179228ae2a3f51839856 100644 --- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp +++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp @@ -290,11 +290,10 @@ void Interactor3D::setInactive() { active = false; } ////////////////////////////////////////////////////////////////////////// bool Interactor3D::isActive() { return active; } ////////////////////////////////////////////////////////////////////////// -void Interactor3D::initInteractor(const double & /*timeStep*/) +void Interactor3D::updateBlocks() { - // UBLOG(logINFO, "transBlocks.size = "<<transBlocks.size()); - - for (SPtr<Block3D> block : bcBlocks) { + for (SPtr<Block3D> block : bcBlocks) + { this->setDifferencesToGbObject3D(block); } } diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h index 9bf3a03ba179a1da7fa932a209d78e39b6622bcf..74627b76addaf6badaea678d1c4a20b274234b3a 100644 --- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h +++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h @@ -57,7 +57,7 @@ public: Interactor3D(SPtr<GbObject3D> geoObject3D, SPtr<Grid3D> grid, int type, Interactor3D::Accuracy a); virtual ~Interactor3D(); - virtual void initInteractor(const double ×tep = 0); + virtual void initInteractor(const double ×tep = 0) = 0; virtual void updateInteractor(const double ×tep = 0) = 0; void setSolidBlock(SPtr<Block3D> block); @@ -76,7 +76,7 @@ public: SPtr<Grid3D> getGrid3D() const { return grid.lock(); } void setGrid3D(SPtr<Grid3D> grid) { this->grid = grid; } virtual SPtr<GbObject3D> getGbObject3D() const { return geoObject3D; } - virtual bool setDifferencesToGbObject3D(const SPtr<Block3D> /*block*//*, const double& x1, const double& x2, const double& x3, const double& blockLengthX1, const double& blockLengthX2, const double& blockLengthX3, const double& timestep=0*/) + virtual bool setDifferencesToGbObject3D(const SPtr<Block3D>) { // UBLOG(logINFO, "Interactor3D::setDifferencesToGbObject3D()"); return false; @@ -123,6 +123,8 @@ protected: bool isBlockCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3, double delta); + void updateBlocks(); + SPtr<GbObject3D> geoObject3D; WPtr<Grid3D> grid; int type; diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp index 9cecabe4653b78df759db47fe3f48a43a090dad5..daed493b9cc1afddbd92acabcd551da0f463ea26 100644 --- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp +++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp @@ -30,24 +30,24 @@ //! \ingroup LBM //! \author Konstantin Kutscher, Martin Geier //======================================================================================= - #include "CumulantK17LBMKernel.h" -#include "BCArray3D.h" -#include "Block3D.h" -#include "D3Q27EsoTwist3DSplittedVector.h" #include "D3Q27System.h" +#include "D3Q27EsoTwist3DSplittedVector.h" +#include <cmath> #include "DataSet3D.h" #include "LBMKernel.h" -#include <cmath> +#include "Block3D.h" +#include "BCArray3D.h" #define PROOF_CORRECTNESS using namespace UbMath; ////////////////////////////////////////////////////////////////////////// -CumulantK17LBMKernel::CumulantK17LBMKernel() { this->compressible = true; } -////////////////////////////////////////////////////////////////////////// -CumulantK17LBMKernel::~CumulantK17LBMKernel(void) = default; +CumulantK17LBMKernel::CumulantK17LBMKernel() +{ + this->compressible = true; +} ////////////////////////////////////////////////////////////////////////// void CumulantK17LBMKernel::initDataSet() { @@ -75,561 +75,561 @@ SPtr<LBMKernel> CumulantK17LBMKernel::clone() ////////////////////////////////////////////////////////////////////////// void CumulantK17LBMKernel::calculate(int step) { - ////////////////////////////////////////////////////////////////////////// - //! Cumulant K17 Kernel is based on - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! and - //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> - //! - //! The cumulant kernel is executed in the following steps - //! - //////////////////////////////////////////////////////////////////////////////// - //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim. - //! - using namespace D3Q27System; - using namespace std; - - //initializing of forcing stuff - if (withForcing) - { - muForcingX1.DefineVar("x1", &muX1); muForcingX1.DefineVar("x2", &muX2); muForcingX1.DefineVar("x3", &muX3); - muForcingX2.DefineVar("x1", &muX1); muForcingX2.DefineVar("x2", &muX2); muForcingX2.DefineVar("x3", &muX3); - muForcingX3.DefineVar("x1", &muX1); muForcingX3.DefineVar("x2", &muX2); muForcingX3.DefineVar("x3", &muX3); - - muDeltaT = deltaT; - - muForcingX1.DefineVar("dt", &muDeltaT); - muForcingX2.DefineVar("dt", &muDeltaT); - muForcingX3.DefineVar("dt", &muDeltaT); - - muNu = (1.0 / 3.0) * (1.0 / collFactor - 1.0 / 2.0); - - muForcingX1.DefineVar("nu", &muNu); - muForcingX2.DefineVar("nu", &muNu); - muForcingX3.DefineVar("nu", &muNu); - } - ///////////////////////////////////// - - localDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions(); - nonLocalDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions(); - restDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions(); - - SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray(); - - const int bcArrayMaxX1 = (int)bcArray->getNX1(); - const int bcArrayMaxX2 = (int)bcArray->getNX2(); - const int bcArrayMaxX3 = (int)bcArray->getNX3(); - - int minX1 = ghostLayerWidth; - int minX2 = ghostLayerWidth; - int minX3 = ghostLayerWidth; - int maxX1 = bcArrayMaxX1 - ghostLayerWidth; - int maxX2 = bcArrayMaxX2 - ghostLayerWidth; - int maxX3 = bcArrayMaxX3 - ghostLayerWidth; - - LBMReal omega = collFactor; - - for (int x3 = minX3; x3 < maxX3; x3++) - { - for (int x2 = minX2; x2 < maxX2; x2++) - { - for (int x1 = minX1; x1 < maxX1; x1++) - { - if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) + ////////////////////////////////////////////////////////////////////////// + //! Cumulant K17 Kernel is based on + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! and + //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a> + //! + //! The cumulant kernel is executed in the following steps + //! + //////////////////////////////////////////////////////////////////////////////// + //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim. + //! + + using namespace std; + + //initializing of forcing stuff + if (withForcing) + { + muForcingX1.DefineVar("x1", &muX1); muForcingX1.DefineVar("x2", &muX2); muForcingX1.DefineVar("x3", &muX3); + muForcingX2.DefineVar("x1", &muX1); muForcingX2.DefineVar("x2", &muX2); muForcingX2.DefineVar("x3", &muX3); + muForcingX3.DefineVar("x1", &muX1); muForcingX3.DefineVar("x2", &muX2); muForcingX3.DefineVar("x3", &muX3); + + muDeltaT = deltaT; + + muForcingX1.DefineVar("dt", &muDeltaT); + muForcingX2.DefineVar("dt", &muDeltaT); + muForcingX3.DefineVar("dt", &muDeltaT); + + muNu = (1.0 / 3.0) * (1.0 / collFactor - 1.0 / 2.0); + + muForcingX1.DefineVar("nu", &muNu); + muForcingX2.DefineVar("nu", &muNu); + muForcingX3.DefineVar("nu", &muNu); + } + ///////////////////////////////////// + + localDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions(); + nonLocalDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions(); + restDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions(); + + SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray(); + + const int bcArrayMaxX1 = (int)bcArray->getNX1(); + const int bcArrayMaxX2 = (int)bcArray->getNX2(); + const int bcArrayMaxX3 = (int)bcArray->getNX3(); + + int minX1 = ghostLayerWidth; + int minX2 = ghostLayerWidth; + int minX3 = ghostLayerWidth; + int maxX1 = bcArrayMaxX1 - ghostLayerWidth; + int maxX2 = bcArrayMaxX2 - ghostLayerWidth; + int maxX3 = bcArrayMaxX3 - ghostLayerWidth; + + LBMReal omega = collFactor; + + for (int x3 = minX3; x3 < maxX3; x3++) + { + for (int x2 = minX2; x2 < maxX2; x2++) + { + for (int x1 = minX1; x1 < maxX1; x1++) { - int x1p = x1 + 1; - int x2p = x2 + 1; - int x3p = x3 + 1; - ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - - //E N T - //c c c - ////////// - //W S B - //a a a - - //Rest is b - - //mfxyz - //a - negative - //b - null - //c - positive - - // a b c - //-1 0 1 - - LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3); - LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3); - LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3); - LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3); - LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3); - LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3); - LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3); - LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3); - LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3); - LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3); - LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3); - LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3); - LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3); - - LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3); - LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3); - LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p); - LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3); - LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3); - LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p); - LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p); - LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p); - LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p); - LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p); - LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p); - LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p); - LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p); - - LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + - (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + - ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb; - - LBMReal rho = c1 + drho; - LBMReal OOrho = c1 / rho; - //////////////////////////////////////////////////////////////////////////////////// - LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + - (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + - (mfcbb - mfabb)) / rho; - LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + - (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + - (mfbcb - mfbab)) / rho; - LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + - (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + - (mfbbc - mfbba)) / rho; - //////////////////////////////////////////////////////////////////////////////////// - //forcing - /////////////////////////////////////////////////////////////////////////////////////////// - if (withForcing) - { - muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1); - muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2); - muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3); - - forcingX1 = muForcingX1.Eval(); - forcingX2 = muForcingX2.Eval(); - forcingX3 = muForcingX3.Eval(); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - vvx += forcingX1 * deltaT * c1o2; // X - vvy += forcingX2 * deltaT * c1o2; // Y - vvz += forcingX3 * deltaT * c1o2; // Z - } - //////////////////////////////////////////////////////////////////////////////////// - // calculate the square of velocities for this lattice node - LBMReal vx2 = vvx * vvx; - LBMReal vy2 = vvy * vvy; - LBMReal vz2 = vvz * vvz; - //////////////////////////////////////////////////////////////////////////////////// - //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - LBMReal wadjust; - LBMReal qudricLimitP = c1o100; - LBMReal qudricLimitM = c1o100; - LBMReal qudricLimitD = c1o100; - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! see also Eq. (6)-(14) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36); - forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9); - forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36); - forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9); - forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9); - forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36); - forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9); - forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6); - forwardChimera(mfaab, mfabb, mfacb, vvy, vy2); - forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18); - forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); - forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6); - forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); - forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1); - forwardChimera(mfaba, mfbba, mfcba, vvx, vx2); - forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3); - forwardChimera(mfaab, mfbab, mfcab, vvx, vx2); - forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); - forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); - forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3); - forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); - forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. - //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. - //! - Third order cumulants \f$ C_{120}+C_{102} \f$, \f$ C_{210}+C_{012} \f$, \f$ C_{201}+C_{021} \f$: \f$\omega_3=OxyyPxzz\f$ set according to Eq. (111) with simplifications assuming \f$\omega_2=1.0\f$. - //! - Third order cumulants \f$ C_{120}-C_{102} \f$, \f$ C_{210}-C_{012} \f$, \f$ C_{201}-C_{021} \f$: \f$\omega_4 = OxyyMxzz\f$ set according to Eq. (112) with simplifications assuming \f$\omega_2 = 1.0\f$. - //! - Third order cumulants \f$ C_{111} \f$: \f$\omega_5 = Oxyz\f$ set according to Eq. (113) with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - Fourth order cumulants \f$ C_{220} \f$, \f$ C_{202} \f$, \f$ C_{022} \f$, \f$ C_{211} \f$, \f$ C_{121} \f$, \f$ C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. - //! - Fifth order cumulants \f$ C_{221}\f$, \f$C_{212}\f$, \f$C_{122}\f$: \f$\omega_9=O5=1.0\f$. - //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. - //! - //////////////////////////////////////////////////////////// - //2. - LBMReal OxxPyyPzz = c1; - //////////////////////////////////////////////////////////// - //3. - LBMReal OxyyPxzz = c8 * (-c2 + omega) * ( c1 + c2*omega) / (-c8 - c14*omega + c7*omega*omega); - LBMReal OxyyMxzz = c8 * (-c2 + omega) * (-c7 + c4*omega) / (c56 - c50*omega + c9*omega*omega); - LBMReal Oxyz = c24 * (-c2 + omega) * (-c2 - c7*omega + c3*omega*omega) / (c48 + c152*omega - c130*omega*omega + c29*omega*omega*omega); - //////////////////////////////////////////////////////////// - //4. - LBMReal O4 = c1; - //////////////////////////////////////////////////////////// - //5. - LBMReal O5 = c1; - //////////////////////////////////////////////////////////// - //6. - LBMReal O6 = c1; - - //////////////////////////////////////////////////////////////////////////////////// - //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity). - //! - LBMReal A = (c4 + c2*omega - c3*omega*omega) / (c2 - c7*omega + c5*omega*omega); - LBMReal B = (c4 + c28*omega - c14*omega*omega) / (c6 - c21*omega + c15*omega*omega); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute cumulants from central moments according to Eq. (20)-(23) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - //////////////////////////////////////////////////////////// - //4. - LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho; - LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho; - LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho; - - LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho)); - LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho)); - LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho)); - //////////////////////////////////////////////////////////// - //5. - LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; - LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; - LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; - //////////////////////////////////////////////////////////// - //6. - LBMReal CUMccc = mfccc + ((-c4 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2 * (mfcaa * mfaca * mfaac) - + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27 * ((drho * drho - drho) * OOrho * OOrho)); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute linear combinations of second and third order cumulants - //! - //////////////////////////////////////////////////////////// - //2. - LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac; - LBMReal mxxMyy = mfcaa - mfaca; - LBMReal mxxMzz = mfcaa - mfaac; - //////////////////////////////////////////////////////////// - //3. - LBMReal mxxyPyzz = mfcba + mfabc; - LBMReal mxxyMyzz = mfcba - mfabc; - - LBMReal mxxzPyyz = mfcab + mfacb; - LBMReal mxxzMyyz = mfcab - mfacb; - - LBMReal mxyyPxzz = mfbca + mfbac; - LBMReal mxyyMxzz = mfbca - mfbac; - - //////////////////////////////////////////////////////////////////////////////////// - //incl. correction - //////////////////////////////////////////////////////////// - //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! Further explanations of the correction in viscosity in Appendix H of - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! Note that the division by rho is omitted here as we need rho times the gradients later. - //! - LBMReal Dxy = -c3 * omega * mfbba; - LBMReal Dxz = -c3 * omega * mfbab; - LBMReal Dyz = -c3 * omega * mfabb; - LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); - LBMReal dyuy = dxux + omega * c3o2 * mxxMyy; - LBMReal dzuz = dxux + omega * c3o2 * mxxMzz; - //////////////////////////////////////////////////////////// - //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); - mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); - mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - ////no correction - //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); - //mxxMyy += -(-omega) * (-mxxMyy); - //mxxMzz += -(-omega) * (-mxxMzz); - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - mfabb += omega * (-mfabb); - mfbab += omega * (-mfbab); - mfbba += omega * (-mfbba); - - //////////////////////////////////////////////////////////////////////////////////// - //relax - ////////////////////////////////////////////////////////////////////////// - // incl. limiter - //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD); - mfbbb += wadjust * (-mfbbb); - wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); - mxxyPyzz += wadjust * (-mxxyPyzz); - wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); - mxxyMyzz += wadjust * (-mxxyMyzz); - wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); - mxxzPyyz += wadjust * (-mxxzPyyz); - wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); - mxxzMyyz += wadjust * (-mxxzMyyz); - wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); - mxyyPxzz += wadjust * (-mxyyPxzz); - wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); - mxyyMxzz += wadjust * (-mxyyMxzz); - ////////////////////////////////////////////////////////////////////////// - // no limiter - //mfbbb += OxyyMxzz * (-mfbbb); - //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); - //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); - //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); - //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); - //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); - //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute inverse linear combinations of second and third order cumulants - //! - mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); - mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz); - mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz); - - mfcba = (mxxyMyzz + mxxyPyzz) * c1o2; - mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; - mfcab = (mxxzMyyz + mxxzPyyz) * c1o2; - mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; - mfbca = (mxyyMxzz + mxyyPxzz) * c1o2; - mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; - ////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - //4. - // no limiter - //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc); - CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac); - CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca); - CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc); - CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb); - CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb); - - ////////////////////////////////////////////////////////////////////////// - //5. - CUMbcc += O5 * (-CUMbcc); - CUMcbc += O5 * (-CUMcbc); - CUMccb += O5 * (-CUMccb); - - ////////////////////////////////////////////////////////////////////////// - //6. - CUMccc += O6 * (-CUMccc); - - //////////////////////////////////////////////////////////////////////////////////// - //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - - ////////////////////////////////////////////////////////////////////////// - //4. - mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho; - mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho; - mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho; - - mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9; - mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9; - mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9; - - ////////////////////////////////////////////////////////////////////////// - //5. - mfbcc = CUMbcc + c1o3 * (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; - mfcbc = CUMcbc + c1o3 * (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; - mfccb = CUMccb + c1o3 * (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; - - ////////////////////////////////////////////////////////////////////////// - //6. - mfccc = CUMccc - ((-c4 * mfbbb * mfbbb - - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) - - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) - - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho - + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) - + c2 * (mfcaa * mfaca * mfaac) - + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho - - c1o3 * (mfacc + mfcac + mfcca) * OOrho - - c1o9 * (mfcaa + mfaca + mfaac) * OOrho - + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) - + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 - + c1o27 * ((drho * drho - drho) * OOrho * OOrho)); - - - //////////////////////////////////////////////////////////////////////////////////// - //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! - mfbaa = -mfbaa; - mfaba = -mfaba; - mfaab = -mfaab; - //////////////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////////////// - //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in - //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> - //! see also Eq. (88)-(96) in - //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> - //! - //////////////////////////////////////////////////////////////////////////////////// - // X - Dir - backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1); - backwardChimera(mfaba, mfbba, mfcba, vvx, vx2); - backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3); - backwardChimera(mfaab, mfbab, mfcab, vvx, vx2); - backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); - backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); - backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3); - backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); - backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9); - - //////////////////////////////////////////////////////////////////////////////////// - // Y - Dir - backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6); - backwardChimera(mfaab, mfabb, mfacb, vvy, vy2); - backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18); - backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); - backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); - backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); - backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6); - backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); - backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18); - - //////////////////////////////////////////////////////////////////////////////////// - // Z - Dir - backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36); - backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9); - backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36); - backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9); - backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); - backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9); - backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36); - backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9); - backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36); - //////////////////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// - //proof correctness - ////////////////////////////////////////////////////////////////////////// + if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) + { + int x1p = x1 + 1; + int x2p = x2 + 1; + int x3p = x3 + 1; + ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + + //E N T + //c c c + ////////// + //W S B + //a a a + + //Rest is b + + //mfxyz + //a - negative + //b - null + //c - positive + + // a b c + //-1 0 1 + + LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3); + LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3); + LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3); + LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3); + LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3); + LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3); + LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3); + LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3); + LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3); + LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3); + LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3); + LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3); + LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3); + + LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3); + LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3); + LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p); + LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3); + LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3); + LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p); + LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p); + LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p); + LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p); + LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p); + LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p); + LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p); + LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p); + + LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! + LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) + + (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) + + ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb; + + LBMReal rho = c1 + drho; + LBMReal OOrho = c1 / rho; + //////////////////////////////////////////////////////////////////////////////////// + LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + + (mfcbb - mfabb)) / rho; + LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + + (mfbcb - mfbab)) / rho; + LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + + (mfbbc - mfbba)) / rho; + //////////////////////////////////////////////////////////////////////////////////// + //forcing + /////////////////////////////////////////////////////////////////////////////////////////// + if (withForcing) + { + muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1); + muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2); + muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3); + + forcingX1 = muForcingX1.Eval(); + forcingX2 = muForcingX2.Eval(); + forcingX3 = muForcingX3.Eval(); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! + vvx += forcingX1 * deltaT * c1o2; // X + vvy += forcingX2 * deltaT * c1o2; // Y + vvz += forcingX3 * deltaT * c1o2; // Z + } + //////////////////////////////////////////////////////////////////////////////////// + // calculate the square of velocities for this lattice node + LBMReal vx2 = vvx * vvx; + LBMReal vy2 = vvy * vvy; + LBMReal vz2 = vvz * vvz; + //////////////////////////////////////////////////////////////////////////////////// + //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + LBMReal wadjust; + LBMReal qudricLimitP = c1o100; + LBMReal qudricLimitM = c1o100; + LBMReal qudricLimitD = c1o100; + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! see also Eq. (6)-(14) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36); + forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9); + forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36); + forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9); + forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9); + forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36); + forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9); + forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36); + + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6); + forwardChimera(mfaab, mfabb, mfacb, vvy, vy2); + forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18); + forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); + forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6); + forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); + forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18); + + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1); + forwardChimera(mfaba, mfbba, mfcba, vvx, vx2); + forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3); + forwardChimera(mfaab, mfbab, mfcab, vvx, vx2); + forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); + forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); + forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3); + forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); + forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE]. + //! - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$. + //! - Third order cumulants \f$ C_{120}+C_{102} \f$, \f$ C_{210}+C_{012} \f$, \f$ C_{201}+C_{021} \f$: \f$\omega_3=OxyyPxzz\f$ set according to Eq. (111) with simplifications assuming \f$\omega_2=1.0\f$. + //! - Third order cumulants \f$ C_{120}-C_{102} \f$, \f$ C_{210}-C_{012} \f$, \f$ C_{201}-C_{021} \f$: \f$\omega_4 = OxyyMxzz\f$ set according to Eq. (112) with simplifications assuming \f$\omega_2 = 1.0\f$. + //! - Third order cumulants \f$ C_{111} \f$: \f$\omega_5 = Oxyz\f$ set according to Eq. (113) with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity). + //! - Fourth order cumulants \f$ C_{220} \f$, \f$ C_{202} \f$, \f$ C_{022} \f$, \f$ C_{211} \f$, \f$ C_{121} \f$, \f$ C_{112} \f$: for simplification all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$. + //! - Fifth order cumulants \f$ C_{221}\f$, \f$C_{212}\f$, \f$C_{122}\f$: \f$\omega_9=O5=1.0\f$. + //! - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$. + //! + //////////////////////////////////////////////////////////// + //2. + LBMReal OxxPyyPzz = c1; + //////////////////////////////////////////////////////////// + //3. + LBMReal OxyyPxzz = c8 * (-c2 + omega) * ( c1 + c2*omega) / (-c8 - c14*omega + c7*omega*omega); + LBMReal OxyyMxzz = c8 * (-c2 + omega) * (-c7 + c4*omega) / (c56 - c50*omega + c9*omega*omega); + LBMReal Oxyz = c24 * (-c2 + omega) * (-c2 - c7*omega + c3*omega*omega) / (c48 + c152*omega - c130*omega*omega + c29*omega*omega*omega); + //////////////////////////////////////////////////////////// + //4. + LBMReal O4 = c1; + //////////////////////////////////////////////////////////// + //5. + LBMReal O5 = c1; + //////////////////////////////////////////////////////////// + //6. + LBMReal O6 = c1; + + //////////////////////////////////////////////////////////////////////////////////// + //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity). + //! + LBMReal A = (c4 + c2*omega - c3*omega*omega) / (c2 - c7*omega + c5*omega*omega); + LBMReal B = (c4 + c28*omega - c14*omega*omega) / (c6 - c21*omega + c15*omega*omega); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute cumulants from central moments according to Eq. (20)-(23) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + //////////////////////////////////////////////////////////// + //4. + LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho; + LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho; + LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho; + + LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho)); + LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho)); + LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho)); + //////////////////////////////////////////////////////////// + //5. + LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho; + LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho; + LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho; + //////////////////////////////////////////////////////////// + //6. + LBMReal CUMccc = mfccc + ((-c4 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2 * (mfcaa * mfaca * mfaac) + + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27 * ((drho * drho - drho) * OOrho * OOrho)); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute linear combinations of second and third order cumulants + //! + //////////////////////////////////////////////////////////// + //2. + LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac; + LBMReal mxxMyy = mfcaa - mfaca; + LBMReal mxxMzz = mfcaa - mfaac; + //////////////////////////////////////////////////////////// + //3. + LBMReal mxxyPyzz = mfcba + mfabc; + LBMReal mxxyMyzz = mfcba - mfabc; + + LBMReal mxxzPyyz = mfcab + mfacb; + LBMReal mxxzMyyz = mfcab - mfacb; + + LBMReal mxyyPxzz = mfbca + mfbac; + LBMReal mxyyMxzz = mfbca - mfbac; + + //////////////////////////////////////////////////////////////////////////////////// + //incl. correction + //////////////////////////////////////////////////////////// + //! - Compute velocity gradients from second order cumulants according to Eq. (27)-(32) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! Further explanations of the correction in viscosity in Appendix H of + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! Note that the division by rho is omitted here as we need rho times the gradients later. + //! + LBMReal Dxy = -c3 * omega * mfbba; + LBMReal Dxz = -c3 * omega * mfbab; + LBMReal Dyz = -c3 * omega * mfabb; + LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz); + LBMReal dyuy = dxux + omega * c3o2 * mxxMyy; + LBMReal dzuz = dxux + omega * c3o2 * mxxMzz; + //////////////////////////////////////////////////////////// + //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz); + mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy); + mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////no correction + //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz); + //mxxMyy += -(-omega) * (-mxxMyy); + //mxxMzz += -(-omega) * (-mxxMzz); + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + mfabb += omega * (-mfabb); + mfbab += omega * (-mfbab); + mfbba += omega * (-mfbba); + + //////////////////////////////////////////////////////////////////////////////////// + //relax + ////////////////////////////////////////////////////////////////////////// + // incl. limiter + //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD); + mfbbb += wadjust * (-mfbbb); + wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP); + mxxyPyzz += wadjust * (-mxxyPyzz); + wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM); + mxxyMyzz += wadjust * (-mxxyMyzz); + wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP); + mxxzPyyz += wadjust * (-mxxzPyyz); + wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM); + mxxzMyyz += wadjust * (-mxxzMyyz); + wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP); + mxyyPxzz += wadjust * (-mxyyPxzz); + wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM); + mxyyMxzz += wadjust * (-mxyyMxzz); + ////////////////////////////////////////////////////////////////////////// + // no limiter + //mfbbb += OxyyMxzz * (-mfbbb); + //mxxyPyzz += OxyyPxzz * (-mxxyPyzz); + //mxxyMyzz += OxyyMxzz * (-mxxyMyzz); + //mxxzPyyz += OxyyPxzz * (-mxxzPyyz); + //mxxzMyyz += OxyyMxzz * (-mxxzMyyz); + //mxyyPxzz += OxyyPxzz * (-mxyyPxzz); + //mxyyMxzz += OxyyMxzz * (-mxyyMxzz); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute inverse linear combinations of second and third order cumulants + //! + mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz); + mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz); + mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz); + + mfcba = (mxxyMyzz + mxxyPyzz) * c1o2; + mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2; + mfcab = (mxxzMyyz + mxxzPyyz) * c1o2; + mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2; + mfbca = (mxyyMxzz + mxyyPxzz) * c1o2; + mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2; + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + //4. + // no limiter + //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48) + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc); + CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac); + CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca); + CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc); + CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb); + CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb); + + ////////////////////////////////////////////////////////////////////////// + //5. + CUMbcc += O5 * (-CUMbcc); + CUMcbc += O5 * (-CUMcbc); + CUMccb += O5 * (-CUMccb); + + ////////////////////////////////////////////////////////////////////////// + //6. + CUMccc += O6 * (-CUMccc); + + //////////////////////////////////////////////////////////////////////////////////// + //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + + ////////////////////////////////////////////////////////////////////////// + //4. + mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho; + mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho; + mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho; + + mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9; + mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9; + mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9; + + ////////////////////////////////////////////////////////////////////////// + //5. + mfbcc = CUMbcc + c1o3 * (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho; + mfcbc = CUMcbc + c1o3 * (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho; + mfccb = CUMccb + c1o3 * (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; + + ////////////////////////////////////////////////////////////////////////// + //6. + mfccc = CUMccc - ((-c4 * mfbbb * mfbbb + - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) + - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) + - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho + + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) + + c2 * (mfcaa * mfaca * mfaac) + + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho + - c1o3 * (mfacc + mfcac + mfcca) * OOrho + - c1o9 * (mfcaa + mfaca + mfaac) * OOrho + + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) + + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3 + + c1o27 * ((drho * drho - drho) * OOrho * OOrho)); + + + //////////////////////////////////////////////////////////////////////////////////// + //! - Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! + mfbaa = -mfbaa; + mfaba = -mfaba; + mfaab = -mfaab; + //////////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////////// + //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in + //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a> + //! see also Eq. (88)-(96) in + //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> + //! + //////////////////////////////////////////////////////////////////////////////////// + // X - Dir + backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1); + backwardChimera(mfaba, mfbba, mfcba, vvx, vx2); + backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3); + backwardChimera(mfaab, mfbab, mfcab, vvx, vx2); + backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2); + backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2); + backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3); + backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2); + backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9); + + //////////////////////////////////////////////////////////////////////////////////// + // Y - Dir + backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6); + backwardChimera(mfaab, mfabb, mfacb, vvy, vy2); + backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18); + backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3); + backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2); + backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9); + backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6); + backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2); + backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18); + + //////////////////////////////////////////////////////////////////////////////////// + // Z - Dir + backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36); + backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9); + backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36); + backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9); + backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9); + backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9); + backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36); + backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9); + backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36); + //////////////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + //proof correctness + ////////////////////////////////////////////////////////////////////////// #ifdef PROOF_CORRECTNESS - LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) - + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) - + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb; - LBMReal dif = drho - drho_post; + LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) + + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb; + LBMReal dif = drho - drho_post; #ifdef SINGLEPRECISION - if (dif > 10.0E-7 || dif < -10.0E-7) + if (dif > 10.0E-7 || dif < -10.0E-7) #else - if (dif > 10.0E-15 || dif < -10.0E-15) + if (dif > 10.0E-15 || dif < -10.0E-15) #endif - { - UB_THROW(UbException(UB_EXARGS, "rho=" + UbSystem::toString(drho) + ", rho_post=" + UbSystem::toString(drho_post) - + " dif=" + UbSystem::toString(dif) - + " rho is not correct for node " + UbSystem::toString(x1) + "," + UbSystem::toString(x2) + "," + UbSystem::toString(x3) - + " in " + block.lock()->toString() + " step = " + UbSystem::toString(step))); - } + { + UB_THROW(UbException(UB_EXARGS, "rho=" + UbSystem::toString(drho) + ", rho_post=" + UbSystem::toString(drho_post) + + " dif=" + UbSystem::toString(dif) + + " rho is not correct for node " + UbSystem::toString(x1) + "," + UbSystem::toString(x2) + "," + UbSystem::toString(x3) + + " in " + block.lock()->toString() + " step = " + UbSystem::toString(step))); + } #endif - //////////////////////////////////////////////////////////////////////////////////// - //! - Write distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm - //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> - //! - (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = mfabb; - (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = mfbab; - (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = mfbba; - (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = mfaab; - (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = mfcab; - (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = mfaba; - (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = mfcba; - (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = mfbaa; - (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = mfbca; - (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = mfaaa; - (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = mfcaa; - (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = mfaca; - (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = mfcca; - - (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = mfcbb; - (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = mfbcb; - (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = mfbbc; - (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = mfccb; - (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = mfacb; - (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = mfcbc; - (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = mfabc; - (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = mfbcc; - (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = mfbac; - (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = mfccc; - (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = mfacc; - (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = mfcac; - (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = mfaac; - - (*this->restDistributions)(x1, x2, x3) = mfbbb; - ////////////////////////////////////////////////////////////////////////// - + //////////////////////////////////////////////////////////////////////////////////// + //! - Write distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm + //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a> + //! + (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = mfabb; + (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = mfbab; + (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = mfbba; + (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = mfaab; + (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = mfcab; + (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = mfaba; + (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = mfcba; + (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = mfbaa; + (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = mfbca; + (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = mfaaa; + (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = mfcaa; + (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = mfaca; + (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = mfcca; + + (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = mfcbb; + (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = mfbcb; + (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = mfbbc; + (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = mfccb; + (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = mfacb; + (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = mfcbc; + (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = mfabc; + (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = mfbcc; + (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = mfbac; + (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = mfccc; + (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = mfacc; + (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = mfcac; + (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = mfaac; + + (*this->restDistributions)(x1, x2, x3) = mfbbb; + ////////////////////////////////////////////////////////////////////////// + + } } - } - } - } + } + } } ////////////////////////////////////////////////////////////////////////// diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h index ca652cb37953f530d8a49a6917026652e4382a87..10cfd49264bb829eac1fc6b9bedeee3b6eace265 100644 --- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h +++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h @@ -34,12 +34,12 @@ #ifndef CumulantK17LBMKernel_h__ #define CumulantK17LBMKernel_h__ +#include "LBMKernel.h" #include "BCProcessor.h" #include "D3Q27System.h" -#include "LBMKernel.h" -#include "basics/container/CbArray3D.h" -#include "basics/container/CbArray4D.h" #include "basics/utilities/UbTiming.h" +#include "basics/container/CbArray4D.h" +#include "basics/container/CbArray3D.h" //! \brief Compressible cumulant LBM kernel. //! \details LBM implementation that use Cascaded Cumulant Lattice Boltzmann method for D3Q27 model @@ -52,17 +52,16 @@ class CumulantK17LBMKernel : public LBMKernel { public: CumulantK17LBMKernel(); - ~CumulantK17LBMKernel() override; + ~CumulantK17LBMKernel() = default; void calculate(int step) override; SPtr<LBMKernel> clone() override; + double getCalculationTime() override { return .0; } protected: - inline void forwardInverseChimeraWithK(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2, - LBMReal Kinverse, LBMReal K); - inline void backwardInverseChimeraWithK(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2, - LBMReal Kinverse, LBMReal K); - inline void forwardChimera(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2); - inline void backwardChimera(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2); + inline void forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K); + inline void backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K); + inline void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2); + inline void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2); virtual void initDataSet(); LBMReal f[D3Q27System::ENDF + 1]; @@ -80,19 +79,18 @@ protected: }; //////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardInverseChimeraWithK +//! \brief forward chimera transformation \ref forwardInverseChimeraWithK //! Transformation from distributions to central moments according to Eq. (6)-(14) in -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! Modified for lower round-off errors. //////////////////////////////////////////////////////////////////////////////// -inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, - LBMReal v2, LBMReal Kinverse, LBMReal K) +inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K) { using namespace UbMath; LBMReal m2 = mfa + mfc; LBMReal m1 = mfc - mfa; LBMReal m0 = m2 + mfb; - mfa = m0; + mfa = m0; m0 *= Kinverse; m0 += c1; mfb = (m1 * Kinverse - m0 * vv) * K; @@ -101,50 +99,49 @@ inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal &mfa, LBMRe //////////////////////////////////////////////////////////////////////////////// //! \brief backward chimera transformation \ref backwardInverseChimeraWithK //! Transformation from central moments to distributions according to Eq. (57)-(65) in -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> Modified for lower round-off errors. +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! ] Modified for lower round-off errors. //////////////////////////////////////////////////////////////////////////////// -inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, - LBMReal v2, LBMReal Kinverse, LBMReal K) +inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K) { using namespace UbMath; LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 - vv) * c1o2) * K; LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (-v2)) * K; - mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K; - mfa = m0; - mfb = m1; + mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K; + mfa = m0; + mfb = m1; } //////////////////////////////////////////////////////////////////////////////// -//! \brief forward chimera transformation \ref forwardChimera +//! \brief forward chimera transformation \ref forwardChimera //! Transformation from distributions to central moments according to Eq. (6)-(14) in -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. +//! Modified for lower round-off errors. //////////////////////////////////////////////////////////////////////////////// -inline void CumulantK17LBMKernel::forwardChimera(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2) +inline void CumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) { using namespace UbMath; LBMReal m1 = (mfa + mfc) + mfb; LBMReal m2 = mfc - mfa; - mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2); - mfb = m2 - vv * m1; - mfa = m1; + mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2); + mfb = m2 - vv * m1; + mfa = m1; } //////////////////////////////////////////////////////////////////////////////// -//! \brief backward chimera transformation \ref backwardChimera +//! \brief backward chimera transformation \ref backwardChimera //! Transformation from central moments to distributions according to Eq. (57)-(65) in -//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 -//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off -//! errors. +//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a> +//! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. +//! Modified for lower round-off errors. //////////////////////////////////////////////////////////////////////////////// -inline void CumulantK17LBMKernel::backwardChimera(LBMReal &mfa, LBMReal &mfb, LBMReal &mfc, LBMReal vv, LBMReal v2) +inline void CumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) { using namespace UbMath; LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2); LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv; - mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); - mfb = mb; - mfa = ma; + mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2); + mfb = mb; + mfa = ma; } #endif // CumulantK17LBMKernel_h__ \ No newline at end of file diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp index e6362563e31ad994ccc7b42c26c5e81a56a6e101..e4bea8735887c3ee9237e4fc368554e4e0002b55 100644 --- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp +++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp @@ -1,9 +1,11 @@ #include "D3Q27System.h" + namespace D3Q27System { using namespace UbMath; + // index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18//falsch -// f: REST, E, W, N, S, T, B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW +// f: ZERO, E, W, N, S, T, B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW // BSE BSW const int EX1[] = { 0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, // 1, -1, 1, -1 }; const int EX2[] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 1, 1, // -1, -1, 1, 1, -1, -1 }; const int EX3[] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, @@ -152,7 +154,7 @@ const double cNorm[3][ENDDIR] = { { double(DX1[0]), // const int BNW = 23; // const int BSE = 24; // const int BSW = 25; -// const int REST /*f0 */ = 26; +// const int ZERO /*f0 */ = 26; // const int INV_E = W; // const int INV_W = E; diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h index b3da79948c60e986b1c96f6f4266012dd7bffd23..b5d88d6c3791d716cd0dca567d7aaa803e863536 100644 --- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h +++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h @@ -35,8 +35,8 @@ #define D3Q27SYSTEM_H #include <cmath> -#include <iostream> #include <string> +#include <iostream> #include "LBMSystem.h" #include "UbException.h" @@ -89,7 +89,7 @@ static const int BNE = 22; static const int BNW = 23; static const int BSE = 24; static const int BSW = 25; -static const int REST = 26; +static const int ZERO = 26; static const int INV_E = W; static const int INV_W = E; @@ -155,7 +155,7 @@ static LBMReal getDensity(const LBMReal *const &f /*[27]*/) return ((f[TNE] + f[BSW]) + (f[TSE] + f[BNW])) + ((f[BSE] + f[TNW]) + (f[TSW] + f[BNE])) + (((f[NE] + f[SW]) + (f[SE] + f[NW])) + ((f[TE] + f[BW]) + (f[BE] + f[TW])) + ((f[BN] + f[TS]) + (f[TN] + f[BS]))) + - ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[REST]; + ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[ZERO]; } /*=====================================================================*/ // ATTENTION: does not apply to all models -> use certificate instead of static! to do @@ -184,7 +184,7 @@ static void calcDensity(const LBMReal *const &f /*[27]*/, LBMReal &rho) rho = ((f[TNE] + f[BSW]) + (f[TSE] + f[BNW])) + ((f[BSE] + f[TNW]) + (f[TSW] + f[BNE])) + (((f[NE] + f[SW]) + (f[SE] + f[NW])) + ((f[TE] + f[BW]) + (f[BE] + f[TW])) + ((f[BN] + f[TS]) + (f[TN] + f[BS]))) + - ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[REST]; + ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[ZERO]; } /*=====================================================================*/ static void calcIncompVelocityX1(const LBMReal *const &f /*[27]*/, LBMReal &vx1) @@ -279,7 +279,7 @@ static LBMReal getCompFeqForDirection(const int &direction, const LBMReal &drho, ////----- LBMReal rho = drho + c1; switch (direction) { - case REST: + case ZERO: return REAL_CAST(c8o27 * (drho + rho * (-cu_sq))); case E: return REAL_CAST(c2o27 * (drho + rho * (3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq))); @@ -354,7 +354,7 @@ static void calcCompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, const LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); LBMReal rho = drho + c1; - feq[REST] = c8o27 * (drho + rho * (-cu_sq)); + feq[ZERO] = c8o27 * (drho + rho * (-cu_sq)); feq[E] = c2o27 * (drho + rho * (3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq)); feq[W] = c2o27 * (drho + rho * (3.0 * (-vx1) + c9o2 * (-vx1) * (-vx1) - cu_sq)); feq[N] = c2o27 * (drho + rho * (3.0 * (vx2) + c9o2 * (vx2) * (vx2)-cu_sq)); @@ -395,7 +395,7 @@ static LBMReal getIncompFeqForDirection(const int &direction, const LBMReal &drh LBMReal cu_sq = 1.5f * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); switch (direction) { - case REST: + case ZERO: return REAL_CAST(c8o27 * (drho - cu_sq)); case E: return REAL_CAST(c2o27 * (drho + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq)); @@ -435,25 +435,25 @@ static LBMReal getIncompFeqForDirection(const int &direction, const LBMReal &drh return REAL_CAST(c1o54 * (drho + 3.0 * (-vx2 + vx3) + c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq)); case TNE: return REAL_CAST(c1o216 * - (drho + 3.0 * (vx1 + vx2 + vx3) + c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)); + (drho + 3.0 * (vx1 + vx2 + vx3) + c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)); case BSW: return REAL_CAST( c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq)); case BNE: return REAL_CAST(c1o216 * - (drho + 3.0 * (vx1 + vx2 - vx3) + c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)); + (drho + 3.0 * (vx1 + vx2 - vx3) + c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)); case TSW: return REAL_CAST( c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq)); case TSE: return REAL_CAST(c1o216 * - (drho + 3.0 * (vx1 - vx2 + vx3) + c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)); + (drho + 3.0 * (vx1 - vx2 + vx3) + c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)); case BNW: return REAL_CAST( c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq)); case BSE: return REAL_CAST(c1o216 * - (drho + 3.0 * (vx1 - vx2 - vx3) + c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)); + (drho + 3.0 * (vx1 - vx2 - vx3) + c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)); case TNW: return REAL_CAST( c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq)); @@ -469,7 +469,7 @@ static void calcIncompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, con LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3); - feq[REST] = c8o27 * (drho - cu_sq); + feq[ZERO] = c8o27 * (drho - cu_sq); feq[E] = c2o27 * (drho + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq); feq[W] = c2o27 * (drho + 3.0 * (-vx1) + c9o2 * (-vx1) * (-vx1) - cu_sq); feq[N] = c2o27 * (drho + 3.0 * (vx2) + c9o2 * (vx2) * (vx2)-cu_sq); @@ -758,7 +758,7 @@ static inline LBMReal calcPress(const LBMReal *const f, LBMReal rho, LBMReal vx1 c2 * (f[NE] + f[SW] + f[SE] + f[NW] + f[TE] + f[BW] + f[BE] + f[TW] + f[TN] + f[BS] + f[BN] + f[TS]) + c3 * (f[TNE] + f[TSW] + f[TSE] + f[TNW] + f[BNE] + f[BSW] + f[BSE] + f[BNW]) - (vx1 * vx1 + vx2 * vx2 + vx3 * vx3)) * - (c1 - c1o2 * OxxPyyPzz) + + (c1 - c1o2 * OxxPyyPzz) + OxxPyyPzz * c1o2 * (rho)) * c1o3; } diff --git a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h index 44d8d3273d6bfb555acb2c994b97dfbeb676f1c3..4dbe8eee09a37c0c220f47619b72bade2e6ec527 100644 --- a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h +++ b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h @@ -45,8 +45,9 @@ class ILBMKernel public: virtual ~ILBMKernel() = default; - virtual void calculate(int step) = 0; - virtual void swapDistributions() = 0; + virtual void calculate(int step) = 0; + virtual double getCalculationTime() = 0; + virtual void swapDistributions() = 0; virtual bool getCompressible() const = 0; virtual SPtr<BCProcessor> getBCProcessor() const = 0; diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h index bfaf9d31275d12ac2d4795c46787af864329980d..be29589b9b7ab239bece700126ae906795c83977 100644 --- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h +++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h @@ -56,6 +56,9 @@ public: virtual SPtr<LBMKernel> clone() = 0; + void calculate(int step) override = 0; + double getCalculationTime() override = 0; + void setBCProcessor(SPtr<BCProcessor> bcp) override; SPtr<BCProcessor> getBCProcessor() const override; diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h index 376330314497224f2ac92e61de7ae4bf81589c93..40570cc3847f71a1942791afa7e95145daafb53b 100644 --- a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h +++ b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h @@ -97,6 +97,14 @@ public: this->init(refLengthWorld, csWorld, rhoWorld, csWorld, refLengthLb, rhoLb, csLb); } + LBMUnitConverter(int /*dummy*/, double uReal, double uLB, double nuReal, double nuLB) + { + factorVelocityLbToW = uReal / uLB; + factorViscosityLbToW = nuReal / nuLB; + factorDensityLbToW = factorViscosityLbToW * factorVelocityLbToW * factorVelocityLbToW; + factorPressureLbToW = factorDensityLbToW; + } + virtual ~LBMUnitConverter() = default; double getRefRhoLb() { return refRhoLb; } @@ -132,6 +140,10 @@ public: double getFactorAccWToLb() { return 1.0 / this->getFactorAccLbToW(); } double getFactorTimeLbToW(double deltaX) const { return factorTimeWithoutDx * deltaX; } + ////////////////////////////////////////////////////////////////////////// + double getFactorVelocityLbToW2() { return factorVelocityLbToW; } + double getFactorDensityLbToW2() { return factorDensityLbToW; } + double getFactorPressureLbToW2() { return factorPressureLbToW; } /*==========================================================*/ friend inline std::ostream &operator<<(std::ostream &os, LBMUnitConverter c) @@ -199,7 +211,12 @@ protected: double factorTimeLbToW{ 1.0 }; double factorMassLbToW{ 1.0 }; double refRhoLb{ 1.0 }; - double factorTimeWithoutDx; + double factorTimeWithoutDx{ 0.0 }; + + double factorVelocityLbToW{ 1.0 }; + double factorViscosityLbToW{ 1.0 }; + double factorDensityLbToW{ 1.0 }; + double factorPressureLbToW{ 1.0 }; }; #endif // LBMUNITCONVERTER_H diff --git a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h index b476fd7750fb5ef950d13b6157349607b6d4d7c9..670a597cb84bd4e98450dad2743a8100f04497ea 100644 --- a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h +++ b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h @@ -36,8 +36,8 @@ #if defined(_WIN32) || defined(_WIN64) #define MEMORYUTIL_WINDOWS -#include "psapi.h" #include "windows.h" +#include "psapi.h" #pragma comment(lib, "psapi.lib") #elif defined __APPLE__ #define MEMORYUTIL_APPLE diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp index 8cbf1801f94111f26f5874753271e24873420a1e..c424a6376a62159da2e4b9f73ccf01858fbde521 100644 --- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp +++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp @@ -294,7 +294,7 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> f[BNW] = f_TSE + feq[BNW]; f[BSE] = f_TNW + feq[BSE]; f[BSW] = f_TNE + feq[BSW]; - f[REST] = f_ZERO + feq[REST]; + f[ZERO] = f_ZERO + feq[ZERO]; // calcFeqsFct(f,rho,vx1,vx2,vx3); // distributions->setDistribution(f, ix1, ix2, ix3); diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.cpp index 9814ab036621d94833ad9b4baff61866b22eac4c..c0efdcc6135b1e06f766621dd4528f96fa32247d 100644 --- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.cpp +++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.cpp @@ -37,13 +37,11 @@ #include "Grid3D.h" #include "Grid3DSystem.h" -SetConnectorsBlockVisitor::SetConnectorsBlockVisitor(SPtr<Communicator> comm, bool fullConnector, int dirs, LBMReal nu) - : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), comm(comm), fullConnector(fullConnector), dirs(dirs), nu(nu) +SetConnectorsBlockVisitor::SetConnectorsBlockVisitor(SPtr<Communicator> comm, bool fullConnector, int dirs, LBMReal nue) + : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), comm(comm), fullConnector(fullConnector), dirs(dirs), nue(nue) { } ////////////////////////////////////////////////////////////////////////// -SetConnectorsBlockVisitor::~SetConnectorsBlockVisitor(void) = default; -////////////////////////////////////////////////////////////////////////// void SetConnectorsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block) { if (!block) diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h index 7d209e0524a13ab89a48dfc2179203eb17b95162..f6eb15206371af2ff6106a5c82c6c71eba26fb34 100644 --- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h +++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h @@ -39,6 +39,7 @@ #include "Block3DVisitor.h" #include "D3Q27System.h" + class Grid3D; class Block3D; class Communicator; @@ -48,8 +49,8 @@ class InterpolationProcessor; class SetConnectorsBlockVisitor : public Block3DVisitor { public: - SetConnectorsBlockVisitor(SPtr<Communicator> comm, bool fullConnector, int dirs, LBMReal nu); - ~SetConnectorsBlockVisitor() override; + SetConnectorsBlockVisitor(SPtr<Communicator> comm, bool fullConnector, int dirs, LBMReal nue); + ~SetConnectorsBlockVisitor() = default; void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override; ////////////////////////////////////////////////////////////////////////// protected: @@ -58,7 +59,7 @@ protected: bool fullConnector; int dirs; int gridRank; - LBMReal nu; + LBMReal nue; SPtr<InterpolationProcessor> iProcessor; }; diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp index bc4b3d25701789cfb62b107880680a3287486f8b..7dde0c34edf5c16dff22361a6bbc36394a9783ed 100644 --- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp +++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp @@ -1,3 +1,4 @@ +#include "MemoryUtil.h" //======================================================================================= // ____ ____ __ ______ __________ __ __ __ __ // \ \ | | | | | _ \ |___ ___| | | | | / \ | | @@ -43,13 +44,24 @@ #include <utility> ////////////////////////////////////////////////////////////////////////// -SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, SetKernelBlockVisitor::Action action) +SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, + SetKernelBlockVisitor::Action action) : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true) { } + +SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int &numberOfProcesses, + SetKernelBlockVisitor::Action action) + : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true), + numberOfProcesses(numberOfProcesses) +{ +} + ////////////////////////////////////////////////////////////////////////// void SetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block) { + throwExceptionIfNotEnoughMemory(grid); + if (kernel && (block->getRank() == grid->getRank())) { LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel()); kernel->setCollisionFactor(collFactor); @@ -98,3 +110,27 @@ void SetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block) } void SetKernelBlockVisitor::setNoDataSetFlag(bool flag) { dataSetFlag = flag; } + +void SetKernelBlockVisitor::throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &grid) +{ + auto availableMemory = Utilities::getTotalPhysMem(); + auto requiredMemory = getRequiredPhysicalMemory(grid); + if (requiredMemory > availableMemory) + throw UbException(UB_EXARGS, "SetKernelBlockVisitor: Not enough memory!!!"); +} + +double SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const +{ + unsigned long long numberOfNodesPerBlockWithGhostLayer; + auto numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks(); + auto blockNx = grid->getBlockNX(); + int ghostLayer = 3; + + numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (val<1>(blockNx) + ghostLayer) * + (val<2>(blockNx) + ghostLayer) * (val<3>(blockNx) + ghostLayer); + + auto needMemAll = + double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4)); + + return needMemAll / double(numberOfProcesses); +} diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h index c2863555c0490b24e23ae2d99933e62ce94ac467..7ce7c852e2a815bdf0a37f3ef2960e1b5b76e4b4 100644 --- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h +++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h @@ -51,6 +51,10 @@ public: SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel); + + SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int &numberOfProcesses, + SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel); + ~SetKernelBlockVisitor() override = default; void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override; @@ -62,6 +66,12 @@ private: LBMReal nue; Action action; bool dataSetFlag; + + int numberOfProcesses{ 1 }; + + double getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const; + + void throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &grid); }; #endif diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt index 88df905d3eb847c1ed5e9b5bc5a1ec68c549e313..da404e0209ed2c9f36ae323d2e6bd234fb6dfb96 100644 --- a/src/gpu/GksGpu/CMakeLists.txt +++ b/src/gpu/GksGpu/CMakeLists.txt @@ -1,8 +1,3 @@ +project(GksGpu LANGUAGES CUDA CXX) -vf_add_library(BUILDTYPE shared PRIVATE_LINK basics GksMeshAdapter) - -linkCUDA() -linkMPI() - -vf_get_library_name(library_name) -linkOpenMP(${library_name}) +vf_add_library(PRIVATE_LINK basics GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX) diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt index 4a16d6fbc0b99d1ec400944178844123eb50ffc0..cb00b3c016786c41ef5640eb362322bb0a3768f8 100644 --- a/src/gpu/GksMeshAdapter/CMakeLists.txt +++ b/src/gpu/GksMeshAdapter/CMakeLists.txt @@ -1,5 +1,3 @@ +project(GksMeshAdapter LANGUAGES CUDA CXX) - -vf_add_library(BUILDTYPE shared PRIVATE_LINK basics GridGenerator) - -linkCUDA() +vf_add_library(PRIVATE_LINK basics GridGenerator) diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt index aaabd6d5de07940194e8428f0d3c9a18a741ae33..1ce294bf420f657f35397c427929f9f310d04556 100644 --- a/src/gpu/GridGenerator/CMakeLists.txt +++ b/src/gpu/GridGenerator/CMakeLists.txt @@ -1,9 +1,13 @@ +project(GridGenerator LANGUAGES CUDA CXX) -vf_add_library(BUILDTYPE shared PRIVATE_LINK basics) -vf_get_library_name(library_name) +vf_add_library(PRIVATE_LINK basics OpenMP::OpenMP_CXX) -linkCUDA() -linkOpenMP(${library_name}) +vf_get_library_name(library_name) +set_target_properties(${library_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -set_target_properties(${library_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file +# according to linker error when building static libraries. +# https://stackoverflow.com/questions/50033435/cmake-cuda-separate-compilation-static-lib-link-error-on-windows-but-not-on-ubun +if (NOT BUILD_SHARED_LIBRARY) + set_target_properties(${library_name} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) +endif() \ No newline at end of file diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp index 750550d7e79b5aba978d8052b83668958cebb837..4081aeffd9165e838959c78e39c9b51d6082c4a7 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.cpp @@ -37,7 +37,7 @@ #include "grid/BoundaryConditions/Side.h" #include "grid/Grid.h" -bool BoundaryCondition::isSide( SideType side ) const +bool gg::BoundaryCondition::isSide( SideType side ) const { return this->side->whoAmI() == side; } diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h index e4e67af092841b426e0d7774048af78abedc79ac..b082e6a7402a606b72d08bc28e9b612fa6661974 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h @@ -44,25 +44,30 @@ class Grid; class Side; enum class SideType; +namespace gg +{ + class BoundaryCondition { public: std::vector<uint> indices; SPtr<Side> side; - std::vector<std::vector<real> > qs; + std::vector<std::vector<real>> qs; std::vector<uint> patches; virtual char getType() const = 0; - bool isSide( SideType side ) const; + bool isSide(SideType side) const; - real getQ( uint index, uint dir ){ return this->qs[index][dir]; } + real getQ(uint index, uint dir) { return this->qs[index][dir]; } }; +} + ////////////////////////////////////////////////////////////////////////// -class PressureBoundaryCondition : public BoundaryCondition +class PressureBoundaryCondition : public gg::BoundaryCondition { public: static SPtr<PressureBoundaryCondition> make(real rho) @@ -91,7 +96,7 @@ public: ////////////////////////////////////////////////////////////////////////// -class VelocityBoundaryCondition : public BoundaryCondition +class VelocityBoundaryCondition : public gg ::BoundaryCondition { public: static SPtr<VelocityBoundaryCondition> make(real vx, real vy, real vz) diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp index 6937ac88c35e75e95c9e6a3d2973d170857dc028..99097a393735a31abad0dd717a2dcfc2b1d35326 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp @@ -38,6 +38,8 @@ #include "utilities/math/Math.h" +using namespace gg; + void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter) { diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h index 7208ece4835e7476cdb17d236bf9bfabbba0493a..d8dc9a0e4ac8c2825d49dd19148d53538d027a8e 100644 --- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h +++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h @@ -46,7 +46,11 @@ #define NEGATIVE_DIR -1 class Grid; + +namespace gg +{ class BoundaryCondition; +} class Side; @@ -60,7 +64,7 @@ enum class SideType class Side { public: - virtual void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) = 0; + virtual void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) = 0; virtual int getCoordinate() const = 0; virtual int getDirection() const = 0; @@ -68,12 +72,12 @@ public: virtual SideType whoAmI() const = 0; protected: - static void addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant, + static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant, real startInner, real endInner, real startOuter, real endOuter); - static void setPressureNeighborIndices(SPtr<BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); + static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index); - static void setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uint index); + static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index); private: static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2); @@ -82,7 +86,7 @@ private: class MX : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -103,7 +107,7 @@ public: class PX : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -125,7 +129,7 @@ public: class MY : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -146,7 +150,7 @@ public: class PY : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -168,7 +172,7 @@ public: class MZ : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { @@ -189,7 +193,7 @@ public: class PZ : public Side { public: - void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition) override; + void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override; int getCoordinate() const override { diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 84b6c8062d5d4cbe77a8ac13919ba9b7ca4b976e..f398e89a0936e0e03261c7b75f22e7311d96e161 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -60,7 +60,10 @@ class Grid; enum class SideType; +namespace gg +{ class BoundaryCondition; +} class GeometryBoundaryCondition; class GridBuilder @@ -93,7 +96,7 @@ public: virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const = 0; virtual void getVelocityQs(real* qs[27], int level) const = 0; - virtual SPtr<BoundaryCondition> getBoundaryCondition( SideType side, uint level ) const = 0; + virtual SPtr<gg::BoundaryCondition> getBoundaryCondition( SideType side, uint level ) const = 0; }; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp index 09debba7d011ec20c389ae9657b8a46185863b58..7cf735197b9fd69c7f85351e659fb31c4818efd6 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp @@ -227,7 +227,7 @@ void LevelGridBuilder::getVelocityQs(real* qs[27], int level) const } } -GRIDGENERATOR_EXPORT SPtr<BoundaryCondition> LevelGridBuilder::getBoundaryCondition(SideType side, uint level) const +GRIDGENERATOR_EXPORT SPtr<gg::BoundaryCondition> LevelGridBuilder::getBoundaryCondition(SideType side, uint level) const { for( auto bc : this->boundaryConditions[level]->velocityBoundaryConditions ) if( bc->isSide(side) ) diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index dcf491070de02ed4d700e778d0d883ce2b9b959e..100dedede48d962174b852b88ede465777bc25c2 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -87,7 +87,7 @@ public: GRIDGENERATOR_EXPORT virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const; GRIDGENERATOR_EXPORT virtual void getVelocityQs(real* qs[27], int level) const; - GRIDGENERATOR_EXPORT SPtr<BoundaryCondition> getBoundaryCondition( SideType side, uint level ) const override; + GRIDGENERATOR_EXPORT SPtr<gg::BoundaryCondition> getBoundaryCondition( SideType side, uint level ) const override; protected: diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index 477a8cc73b17873dfbfc0e11173db0ff5ad5593d..95405b25864f506e5580fa6711a4389d39d8d7a7 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -1,21 +1,14 @@ +project(VirtualFluids_GPU LANGUAGES CUDA CXX) +set(additional_libraries "") +if(MSVC) + set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix +endif() -IF(MSVC) - set(libsToLink ws2_32 GridGenerator basics) # ws_32 throws an error on Phoenix -ELSE(MSVC) - set(libsToLink GridGenerator basics) -ENDIF(MSVC) +vf_add_library(PRIVATE_LINK ${additional_libraries} GridGenerator basics MPI::MPI_CXX) - -vf_add_library(BUILDTYPE shared PRIVATE_LINK ${libsToLink}) - -linkCUDA() +linkBoost(COMPONENTS "serialization") #SET(TPN_WIN32 "/EHsc") #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake #https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax - -IF(MSVC) - vf_get_library_name(library_name) - set_target_properties(${library_name} PROPERTIES LINK_FLAGS "/ignore:4251") -ENDIF(MSVC)