diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1b8ce0027cc34042ed2507b883898832f4eaa9b8..64c0cfd85cae1b82b32b47de7ac95fbc42567578 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,8 @@ "ms-vscode.cpptools", "ms-vscode.cpptools-extension-pack", "xaver.clang-format", - "notskm.clang-tidy" + "notskm.clang-tidy", + "streetsidesoftware.code-spell-checker" ], "runArgs": ["--gpus","all"], "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.2", diff --git a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h index cabbc21c92113b490d31b6e6ae9ad834b41fd44b..f1610b7e274e1c12c34772ef8a8d74da49ee9a81 100644 --- a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h +++ b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h @@ -6,7 +6,7 @@ #include <memory> #include <ostream> -#include "global.h" +#include "gpu/GridGenerator/global.h" class VertexMemento; diff --git a/src/gpu/GridGenerator/grid/Cell.h b/src/gpu/GridGenerator/grid/Cell.h index 845e02eaa66a5b2327b5a2ba63b1227962ab8f61..0d6c3e13d391a451201131eb27216102b3545077 100644 --- a/src/gpu/GridGenerator/grid/Cell.h +++ b/src/gpu/GridGenerator/grid/Cell.h @@ -1,9 +1,9 @@ #ifndef CELL_H #define CELL_H -#include "global.h" +#include "gpu/GridGenerator/global.h" -#include "utilities/cuda/cudaDefines.h" +#include "gpu/GridGenerator/utilities/cuda/cudaDefines.h" struct Point { diff --git a/src/gpu/GridGenerator/grid/Field.h b/src/gpu/GridGenerator/grid/Field.h index 9e7513108fa039cc6b14ba519fce6acf667ed2f6..d2ad5ca782ff68f1983108609c7bb23e729985b0 100644 --- a/src/gpu/GridGenerator/grid/Field.h +++ b/src/gpu/GridGenerator/grid/Field.h @@ -1,7 +1,7 @@ #ifndef FIELD_H #define FIELD_H -#include "global.h" +#include "gpu/GridGenerator/global.h" struct Vertex; class GridStrategy; diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h index 21cfea189e58067fabc753d27fb13803abdcb9aa..36d607bc06c759b3dc7a7d27dfacbe383283a24e 100644 --- a/src/gpu/GridGenerator/grid/Grid.h +++ b/src/gpu/GridGenerator/grid/Grid.h @@ -3,11 +3,11 @@ #include "Core/LbmOrGks.h" -#include "global.h" +#include "gpu/GridGenerator/global.h" -#include "geometries/Vertex/Vertex.h" +#include "gpu/GridGenerator/geometries/Vertex/Vertex.h" -#include "grid/Cell.h" +#include "gpu/GridGenerator/grid/Cell.h" class TriangularMesh; struct Vertex; diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h index 9ff99fad4e2577536f50ce112acdc0e2e6bf40d5..3d516c6710d44d5c8da45c0f71e36ea21e6e9655 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h @@ -5,7 +5,7 @@ #include <string> #include <memory> -#include "global.h" +#include "gpu/GridGenerator/global.h" #define GEOMQS 6 #define INLETQS 0 diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h index f212f2c02ee21ab8d1f944863b6e1b59f0615f47..f90bbe231a64a2d1c51536fd96fa69792c1348da 100644 --- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h +++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h @@ -6,11 +6,11 @@ #include <memory> #include <array> -#include "global.h" +#include "gpu/GridGenerator/global.h" -#include "grid/GridBuilder/GridBuilder.h" -#include "grid/Grid.h" -#include "grid/GridInterface.h" +#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h" +#include "gpu/GridGenerator/grid/Grid.h" +#include "gpu/GridGenerator/grid/GridInterface.h" struct Vertex; class Grid; diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h index 09f278a4d41bba346fd9ca3b38d028ef5d9fca4d..08c4f795b320cdd425e7d2c10c4d3173ecb6e83c 100644 --- a/src/gpu/GridGenerator/grid/GridImp.h +++ b/src/gpu/GridGenerator/grid/GridImp.h @@ -5,12 +5,12 @@ #include "Core/LbmOrGks.h" -#include "global.h" +#include "gpu/GridGenerator/global.h" -#include "grid/distributions/Distribution.h" -#include "grid/Grid.h" -#include "grid/Cell.h" -#include "grid/Field.h" +#include "gpu/GridGenerator/grid/distributions/Distribution.h" +#include "gpu/GridGenerator/grid/Grid.h" +#include "gpu/GridGenerator/grid/Cell.h" +#include "gpu/GridGenerator/grid/Field.h" class TriangularMesh; struct Vertex; @@ -39,7 +39,7 @@ extern CONSTANT int DIRECTIONS[DIR_END_MAX][DIMENSION]; class GRIDGENERATOR_EXPORT GridImp : public enableSharedFromThis<GridImp>, public Grid { -private: +protected: CUDA_HOST GridImp(); CUDA_HOST GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, SPtr<GridStrategy> gridStrategy, Distribution d, uint level); diff --git a/src/gpu/GridGenerator/grid/GridInterface.h b/src/gpu/GridGenerator/grid/GridInterface.h index d0f04ea3451b3044c349aa0e27d2f7c6e567128c..2044fb494924046ac255ad1a1e7cc74496adae6e 100644 --- a/src/gpu/GridGenerator/grid/GridInterface.h +++ b/src/gpu/GridGenerator/grid/GridInterface.h @@ -1,7 +1,7 @@ #ifndef GRID_INTERFACE_H #define GRID_INTERFACE_H -#include "global.h" +#include "gpu/GridGenerator/global.h" class GridImp; diff --git a/src/gpu/GridGenerator/grid/distributions/Distribution.h b/src/gpu/GridGenerator/grid/distributions/Distribution.h index b05b5db3652ee952ff083db560ed8316688819c9..04b7093aadd16cc755358d123a415fd5bb032703 100644 --- a/src/gpu/GridGenerator/grid/distributions/Distribution.h +++ b/src/gpu/GridGenerator/grid/distributions/Distribution.h @@ -4,7 +4,7 @@ #include <vector> #include <string> -#include "global.h" +#include "gpu/GridGenerator/global.h" #define DIR_END_MAX 27 diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h index eb10c9c107bb8e777e6f9a5d7bb4a57d021266fe..116c84b12365e8932dadd82180711bcaa9706928 100644 --- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h +++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h @@ -10,7 +10,7 @@ #include "Core/NonCreatable.h" -#include "global.h" +#include "gpu/GridGenerator/global.h" class UnstructuredGridBuilder; class GridBuilder; diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index f64b0e676f3f76aac601a372c34b5ad2559df6fe..14bf4f6641f9b874788d3674b11fa6ca135f7564 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -20,6 +20,7 @@ vf_add_tests() if(BUILD_VF_UNIT_TESTS) set_target_properties(VirtualFluids_GPUTests PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_source_files_properties(Kernel/Utilities/DistributionHelperTests.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA) target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/") endif() diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h index d12a25733539c319cdfd6ead1d6aa169fe6ae52d..7ffe404cf50f2fdddecf7594da75d05595db19d4 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h @@ -9,7 +9,7 @@ #include "PointerDefinitions.h" #include "VirtualFluids_GPU_export.h" -#include <GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h> +#include <gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h> class Parameter; class GridBuilder; diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp index 5edae460afafff5cb68c9d98fc13fcbe48067fce..fe1ae5ba66c23e7fd2311f085327eb3b0a57a717 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp @@ -339,7 +339,7 @@ void GridGenerator::initalValuesDomainDecompostion() builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborX[j].index, direction, level); if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC) - indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, j, direction); + indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, j, direction); //////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyProcessNeighborXIndex(level, j); //////////////////////////////////////////////////////////////////////////////////////// @@ -408,7 +408,7 @@ void GridGenerator::initalValuesDomainDecompostion() builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborY[j].index, direction, level); if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC) - indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseY(level, j, direction); + indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseY(level, j, direction); //////////////////////////////////////////////////////////////////////////////////////// cudaMemoryManager->cudaCopyProcessNeighborYIndex(level, j); //////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp index 70a5505d95ad16e9df3a80545a23b90d83f87d3b..1bdf32f281dc4f2d22cf6bf3b3ff43ca62cd592c 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp @@ -1,221 +1,274 @@ #include "IndexRearrangementForStreams.h" +#include "Communication/Communicator.h" #include "Parameter/Parameter.h" -#include <GridGenerator/grid/GridBuilder/GridBuilder.h> #include <GridGenerator/grid/Grid.h> -#include "Communication/Communicator.h" +#include <GridGenerator/grid/GridBuilder/GridBuilder.h> -#include <iostream> #include <algorithm> +#include <iostream> IndexRearrangementForStreams::IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder) : para(para), builder(builder) -{ } +{ +} -void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j, +void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, + int indexOfProcessNeighbor, int direction) { // init send indices for communication after coarse to fine std::cout << "communication: reorder send indices X "; - para->initNumberOfProcessNeighborsAfterFtoCX(level); + para->initProcessNeighborsAfterFtoCX(level); std::vector<uint> sendIndicesForCommAfterFtoCPositions; - reorderSendIndicesForCommAfterFtoCX(direction, level, j, sendIndicesForCommAfterFtoCPositions); - para->setSendProcessNeighborsAfterFtoCX(para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes, - level, j); + reorderSendIndicesForCommAfterFtoCX(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions); + para->setSendProcessNeighborsAfterFtoCX( + para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); - // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process + // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions + // from sending process std::cout << "mpi send and receive "; std::vector<uint> recvIndicesForCommAfterFtoCPositions; recvIndicesForCommAfterFtoCPositions.resize( - (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes * + (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes * 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way auto comm = vf::gpu::Communicator::getInstanz(); comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->recvProcessNeighborX[j].rankNeighbor, + para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->sendProcessNeighborX[j].rankNeighbor); - + para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].rankNeighbor); + // resize receiving vector to correct size auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end()); - recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way + recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), + recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way // init receive indices for communication after coarse to fine std::cout << "reorder receive indices "; - reorderRecvIndicesForCommAfterFtoCX(direction, level, j, recvIndicesForCommAfterFtoCPositions); - para->setRecvProcessNeighborsAfterFtoCX(para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].numberOfNodes, - level, j); - copyProcessNeighborToCommAfterFtoCX(level, j); + reorderRecvIndicesForCommAfterFtoCX(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions); + para->setRecvProcessNeighborsAfterFtoCX( + para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); + copyProcessNeighborToCommAfterFtoCX(level, indexOfProcessNeighbor); std::cout << "done." << std::endl; } -void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction) +void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, + int indexOfProcessNeighbor, + int direction) { // init send indices for communication after coarse to fine std::cout << "communication: reorder send indices Y "; - para->initNumberOfProcessNeighborsAfterFtoCY(level); + para->initProcessNeighborsAfterFtoCY(level); std::vector<uint> sendIndicesForCommAfterFtoCPositions; - reorderSendIndicesForCommAfterFtoCY(direction, level, j, sendIndicesForCommAfterFtoCPositions); - para->setSendProcessNeighborsAfterFtoCY(para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes, - level, j); + reorderSendIndicesForCommAfterFtoCY(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions); + para->setSendProcessNeighborsAfterFtoCY( + para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); - // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process + // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions + // from sending process std::cout << "mpi send and receive "; - std::vector<uint> recvIndicesForCommAfterFtoCPositions; - recvIndicesForCommAfterFtoCPositions.resize((size_t) para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes * - 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way + std::vector<uint> recvIndicesForCommAfterFtoCPositions; + recvIndicesForCommAfterFtoCPositions.resize( + (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes * + 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way auto comm = vf::gpu::Communicator::getInstanz(); comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->recvProcessNeighborY[j].rankNeighbor, + para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->sendProcessNeighborY[j].rankNeighbor); - + para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].rankNeighbor); + // resize receiving vector to correct size auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end()); - recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way + recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), + recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way // init receive indices for communication after coarse to fine std::cout << "reorder receive indices "; - reorderRecvIndicesForCommAfterFtoCY(direction, level, j, recvIndicesForCommAfterFtoCPositions); - para->setRecvProcessNeighborsAfterFtoCY(para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].numberOfNodes, - level, j); + reorderRecvIndicesForCommAfterFtoCY(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions); + para->setRecvProcessNeighborsAfterFtoCY( + para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); - copyProcessNeighborToCommAfterFtoCY(level, j); + copyProcessNeighborToCommAfterFtoCY(level, indexOfProcessNeighbor); std::cout << "done." << std::endl; } -void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction) +void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, + int indexOfProcessNeighbor, + int direction) { // init send indices for communication after coarse to fine std::cout << "communication: reorder send indices Z "; - para->initNumberOfProcessNeighborsAfterFtoCZ(level); + para->initProcessNeighborsAfterFtoCZ(level); std::vector<uint> sendIndicesForCommAfterFtoCPositions; - reorderSendIndicesForCommAfterFtoCZ(direction, level, j, sendIndicesForCommAfterFtoCPositions); - para->setSendProcessNeighborsAfterFtoCZ(para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes, - level, j); + reorderSendIndicesForCommAfterFtoCZ(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions); + para->setSendProcessNeighborsAfterFtoCZ( + para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); - // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process + // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions + // from sending process std::cout << "mpi send and receive "; - std::vector<uint> recvIndicesForCommAfterFtoCPositions; - recvIndicesForCommAfterFtoCPositions.resize((size_t) para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes * - 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way + std::vector<uint> recvIndicesForCommAfterFtoCPositions; + recvIndicesForCommAfterFtoCPositions.resize( + (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes * + 2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way auto comm = vf::gpu::Communicator::getInstanz(); comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->recvProcessNeighborZ[j].rankNeighbor, + para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor, sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(), - para->getParH(level)->sendProcessNeighborZ[j].rankNeighbor); - + para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor); + // resize receiving vector to correct size auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end()); - recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way + recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), + recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way // init receive indices for communication after coarse to fine std::cout << "reorder receive indices "; - reorderRecvIndicesForCommAfterFtoCZ(direction, level, j, recvIndicesForCommAfterFtoCPositions); - para->setRecvProcessNeighborsAfterFtoCZ(para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].numberOfNodes, - level, j); + reorderRecvIndicesForCommAfterFtoCZ(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions); + para->setRecvProcessNeighborsAfterFtoCZ( + para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes, level, + indexOfProcessNeighbor); - copyProcessNeighborToCommAfterFtoCZ(level, j); + copyProcessNeighborToCommAfterFtoCZ(level, indexOfProcessNeighbor); std::cout << "done." << std::endl; } -void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCX(const uint &level, int j) +void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor) { // init f[0]* - para->getParD(level)->sendProcessNeighborsAfterFtoCX[j].f[0] = para->getParD(level)->sendProcessNeighborX[j].f[0]; - para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].f[0] = para->getParH(level)->sendProcessNeighborX[j].f[0]; - para->getParD(level)->recvProcessNeighborsAfterFtoCX[j].f[0] = para->getParD(level)->recvProcessNeighborX[j].f[0]; - para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].f[0] = para->getParH(level)->recvProcessNeighborX[j].f[0]; + para->getParD(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] = + para->getParD(level)->sendProcessNeighborX[indexOfProcessNeighbor].f[0]; + para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] = + para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].f[0]; + para->getParD(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] = + para->getParD(level)->recvProcessNeighborX[indexOfProcessNeighbor].f[0]; + para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] = + para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].f[0]; // init index* - para->getParD(level)->sendProcessNeighborsAfterFtoCX[j].index = para->getParD(level)->sendProcessNeighborX[j].index; - para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].index = para->getParH(level)->sendProcessNeighborX[j].index; - para->getParD(level)->recvProcessNeighborsAfterFtoCX[j].index = para->getParD(level)->recvProcessNeighborX[j].index; - para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].index = para->getParH(level)->recvProcessNeighborX[j].index; + para->getParD(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index = + para->getParD(level)->sendProcessNeighborX[indexOfProcessNeighbor].index; + para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index = + para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].index; + para->getParD(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index = + para->getParD(level)->recvProcessNeighborX[indexOfProcessNeighbor].index; + para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index = + para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].index; // rank neighbor - para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].rankNeighbor = para->getParH(level)->sendProcessNeighborX[j].rankNeighbor; - para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].rankNeighbor = para->getParH(level)->recvProcessNeighborX[j].rankNeighbor; + para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].rankNeighbor; + para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor; } -void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCY(const uint &level, int j) +void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor) { // init f[0]* - para->getParD(level)->sendProcessNeighborsAfterFtoCY[j].f[0] = para->getParD(level)->sendProcessNeighborY[j].f[0]; - para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].f[0] = para->getParH(level)->sendProcessNeighborY[j].f[0]; - para->getParD(level)->recvProcessNeighborsAfterFtoCY[j].f[0] = para->getParD(level)->recvProcessNeighborY[j].f[0]; - para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].f[0] = para->getParH(level)->recvProcessNeighborY[j].f[0]; + para->getParD(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] = + para->getParD(level)->sendProcessNeighborY[indexOfProcessNeighbor].f[0]; + para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] = + para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].f[0]; + para->getParD(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] = + para->getParD(level)->recvProcessNeighborY[indexOfProcessNeighbor].f[0]; + para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] = + para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].f[0]; // init index* - para->getParD(level)->sendProcessNeighborsAfterFtoCY[j].index = para->getParD(level)->sendProcessNeighborY[j].index; - para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].index = para->getParH(level)->sendProcessNeighborY[j].index; - para->getParD(level)->recvProcessNeighborsAfterFtoCY[j].index = para->getParD(level)->recvProcessNeighborY[j].index; - para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].index = para->getParH(level)->recvProcessNeighborY[j].index; + para->getParD(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index = + para->getParD(level)->sendProcessNeighborY[indexOfProcessNeighbor].index; + para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index = + para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].index; + para->getParD(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index = + para->getParD(level)->recvProcessNeighborY[indexOfProcessNeighbor].index; + para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index = + para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].index; // rank neighbor - para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].rankNeighbor = para->getParH(level)->sendProcessNeighborY[j].rankNeighbor; - para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].rankNeighbor = para->getParH(level)->recvProcessNeighborY[j].rankNeighbor; + para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].rankNeighbor; + para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor; } -void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCZ(const uint &level, int j) +void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCZ(const uint &level, int indexOfProcessNeighbor) { // init f[0]* - para->getParD(level)->sendProcessNeighborsAfterFtoCZ[j].f[0] = para->getParD(level)->sendProcessNeighborZ[j].f[0]; - para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].f[0] = para->getParH(level)->sendProcessNeighborZ[j].f[0]; - para->getParD(level)->recvProcessNeighborsAfterFtoCZ[j].f[0] = para->getParD(level)->recvProcessNeighborZ[j].f[0]; - para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].f[0] = para->getParH(level)->recvProcessNeighborZ[j].f[0]; + para->getParD(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] = + para->getParD(level)->sendProcessNeighborZ[indexOfProcessNeighbor].f[0]; + para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] = + para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].f[0]; + para->getParD(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] = + para->getParD(level)->recvProcessNeighborZ[indexOfProcessNeighbor].f[0]; + para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] = + para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].f[0]; // init index* - para->getParD(level)->sendProcessNeighborsAfterFtoCZ[j].index = para->getParD(level)->sendProcessNeighborZ[j].index; - para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].index = para->getParH(level)->sendProcessNeighborZ[j].index; - para->getParD(level)->recvProcessNeighborsAfterFtoCZ[j].index = para->getParD(level)->recvProcessNeighborZ[j].index; - para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].index = para->getParH(level)->recvProcessNeighborZ[j].index; + para->getParD(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index = + para->getParD(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index; + para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index = + para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index; + para->getParD(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index = + para->getParD(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index; + para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index = + para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index; // rank neighbor - para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].rankNeighbor = para->getParH(level)->sendProcessNeighborZ[j].rankNeighbor; - para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].rankNeighbor = para->getParH(level)->recvProcessNeighborZ[j].rankNeighbor; + para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor; + para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].rankNeighbor = + para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor; } -void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCX(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCX( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *sendIndices = para->getParH(level)->sendProcessNeighborX[j].index; - int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes; - reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j, + int *sendIndices = para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].index; + int &numberOfSendNodesAfterFtoC = + para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes; + reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCY(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCY( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *sendIndices = para->getParH(level)->sendProcessNeighborY[j].index; - int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes; - reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j, + int *sendIndices = para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].index; + int &numberOfSendNodesAfterFtoC = + para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes; + reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCZ( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *sendIndices = para->getParH(level)->sendProcessNeighborZ[j].index; - int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes; - reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j, + int *sendIndices = para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index; + int &numberOfSendNodesAfterFtoC = + para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes; + reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNeighborsAfterFtoC, - int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC( + int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction, int level, + std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { *logging::out << logging::Logger::INFO_INTERMEDIATE << "reorder send indices for communication after fine to coarse: level: " << level << " direction: " << direction; if (para->getParH(level)->intCF.kCF == 0 || para->getParH(level)->intFC.kFC == 0) *logging::out << logging::Logger::LOGGER_ERROR - << "reorderSendIndicesForCommAfterFtoC(): iCellFCC needs to be inititalized before calling " + << "reorderSendIndicesForCommAfterFtoC(): para->getParH(level)->intCF needs to be inititalized before calling " "this function " << "\n"; @@ -224,10 +277,10 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendI std::vector<int> sendIndicesOther; uint numberOfSendIndices = builder->getNumberOfSendIndices(direction, level); - //iCellFCC + // iCellFCC for (uint posInSendIndices = 0; posInSendIndices < numberOfSendIndices; posInSendIndices++) { sparseIndexSend = sendIndices[posInSendIndices]; - if (isSparseIndexInICellFCC(para->getParH(level)->intFC.kFC, sparseIndexSend, level)){ + if (isSparseIndexInICellFCC(para->getParH(level)->intFC.kFC, sparseIndexSend, level)) { addUniqueIndexToCommunicationVectors(sendIndicesAfterFtoC, sparseIndexSend, sendIndicesForCommAfterFtoCPositions, posInSendIndices); } @@ -240,25 +293,25 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendI findIfSparseIndexIsInSendIndicesAndAddToCommVectors(sparseIndex, sendIndices, numberOfSendIndices, sendIndicesAfterFtoC, sendIndicesForCommAfterFtoCPositions); - numberOfSendNeighborsAfterFtoC = (int)sendIndicesAfterFtoC.size(); + numberOfSendNodesAfterFtoC = (int)sendIndicesAfterFtoC.size(); findIndicesNotInCommAfterFtoC(numberOfSendIndices, sendIndices, sendIndicesAfterFtoC, sendIndicesOther); // copy new vectors back to sendIndices array - for (int i = 0; i < numberOfSendNeighborsAfterFtoC; i++) + for (int i = 0; i < numberOfSendNodesAfterFtoC; i++) sendIndices[i] = sendIndicesAfterFtoC[i]; for (uint i = 0; i < (uint)sendIndicesOther.size(); i++) - sendIndices[i + numberOfSendNeighborsAfterFtoC] = sendIndicesOther[i]; + sendIndices[i + numberOfSendNodesAfterFtoC] = sendIndicesOther[i]; *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process " << " " << vf::gpu::Communicator::getInstanz()->getPID() - << " numberOfSendNeighborsAfterFtoC: " << numberOfSendNeighborsAfterFtoC << "\n "; + << " numberOfSendNodesAfterFtoC: " << numberOfSendNodesAfterFtoC << "\n "; - if (numberOfSendNeighborsAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) { + if (numberOfSendNodesAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) { *logging::out << logging::Logger::LOGGER_ERROR << "reorderSendIndicesForCommAfterFtoC(): incorrect number of nodes" << "\n"; - std::cout << "numberOfSendNeighborsAfterFtoC = " << numberOfSendNeighborsAfterFtoC + std::cout << "numberOfSendNodesAfterFtoC = " << numberOfSendNodesAfterFtoC << ", sendOrIndicesOther.size() = " << sendIndicesOther.size() << ", numberOfSendOrRecvIndices = " << numberOfSendIndices << std::endl; } @@ -292,7 +345,7 @@ void IndexRearrangementForStreams::aggregateNodesInICellCFC(int level, std::vect nodesCFC.push_back(neighborY[neighborX[sparseIndex]]); nodesCFC.push_back(neighborZ[neighborX[sparseIndex]]); nodesCFC.push_back(neighborZ[neighborY[sparseIndex]]); - nodesCFC.push_back(neighborZ[neighborY[neighborX[sparseIndex]]]); + nodesCFC.push_back(neighborZ[neighborY[neighborX[sparseIndex]]]); } // remove duplicate nodes @@ -305,8 +358,9 @@ void IndexRearrangementForStreams::addUniqueIndexToCommunicationVectors( std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend, std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions, uint &posInSendIndices) const { - // add index to corresponding vectors but omit indices which are already in sendIndicesAfterFtoC - if (std::find(sendIndicesAfterFtoC.begin(), sendIndicesAfterFtoC.end(), sparseIndexSend) == sendIndicesAfterFtoC.end()) { + // add index to corresponding vectors, but omit indices which are already in sendIndicesAfterFtoC + if (std::find(sendIndicesAfterFtoC.begin(), sendIndicesAfterFtoC.end(), sparseIndexSend) == + sendIndicesAfterFtoC.end()) { sendIndicesAfterFtoC.push_back(sparseIndexSend); sendIndicesForCommAfterFtoCPositions.push_back(posInSendIndices); } @@ -341,37 +395,37 @@ void IndexRearrangementForStreams::findIndicesNotInCommAfterFtoC(const uint &num } } -void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCX( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *recvIndices = para->getParH(level)->recvProcessNeighborX[j].index; - int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].numberOfNodes; - reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j, + int *recvIndices = para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].index; + int &numberOfRecvNodesAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes; + reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCY( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *recvIndices = para->getParH(level)->recvProcessNeighborY[j].index; - int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].numberOfNodes; - reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j, + int *recvIndices = para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].index; + int &numberOfRecvNodesAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes; + reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCZ( + int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { - int *recvIndices = para->getParH(level)->recvProcessNeighborZ[j].index; - int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].numberOfNodes; - reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j, + int *recvIndices = para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index; + int &numberOfRecvNodesAfterFtoC = + para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes; + reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level, sendIndicesForCommAfterFtoCPositions); } -void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(int *recvIndices, - int &numberOfRecvNeighborsAfterFtoC, int direction, int level, - int j, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) +void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC( + int *recvIndices, int &numberOfRecvNodesAfterFtoC, int direction, int level, + std::vector<uint> &sendIndicesForCommAfterFtoCPositions) { *logging::out << logging::Logger::INFO_INTERMEDIATE << "reorder receive indices for communication after fine to coarse: level: " << level @@ -391,23 +445,23 @@ void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(int *recvI findIndicesNotInCommAfterFtoC(numberOfRecvIndices, recvIndices, recvIndicesAfterFtoC, recvIndicesOther); - numberOfRecvNeighborsAfterFtoC = (int)recvIndicesAfterFtoC.size(); + numberOfRecvNodesAfterFtoC = (int)recvIndicesAfterFtoC.size(); // copy new vectors back to sendIndices array - for (int i = 0; i < numberOfRecvNeighborsAfterFtoC; i++) + for (int i = 0; i < numberOfRecvNodesAfterFtoC; i++) recvIndices[i] = recvIndicesAfterFtoC[i]; for (uint i = 0; i < (uint)recvIndicesOther.size(); i++) - recvIndices[i + numberOfRecvNeighborsAfterFtoC] = recvIndicesOther[i]; + recvIndices[i + numberOfRecvNodesAfterFtoC] = recvIndicesOther[i]; *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process " << " " << vf::gpu::Communicator::getInstanz()->getPID() - << " numberOfRecvNeighborsAfterFtoC: " << numberOfRecvNeighborsAfterFtoC << "\n "; + << " numberOfRecvNodesAfterFtoC: " << numberOfRecvNodesAfterFtoC << "\n "; - if (numberOfRecvNeighborsAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) { + if (numberOfRecvNodesAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) { *logging::out << logging::Logger::LOGGER_ERROR << "reorderRecvIndicesForCommAfterFtoC(): incorrect number of nodes" << "\n"; - std::cout << "numberOfRecvNeighborsAfterFtoC = " << numberOfRecvNeighborsAfterFtoC + std::cout << "numberOfRecvNodesAfterFtoC = " << numberOfRecvNodesAfterFtoC << ", recvIndicesOther.size() = " << recvIndicesOther.size() << ", numberOfRecvIndices = " << numberOfRecvIndices << std::endl; } @@ -429,8 +483,6 @@ void IndexRearrangementForStreams::splitFineToCoarseIntoBorderAndBulk(const uint void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkFC(int level) { - // this function reorders the arrays of FCC/FCF indices and return pointers and sizes of the new subarrays - // create some local variables for better readability uint *iCellFccAll = para->getParH(level)->intFC.ICellFCC; uint *iCellFcfAll = para->getParH(level)->intFC.ICellFCF; @@ -460,6 +512,7 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkFC(int level para->getParH(level)->intFCBulk.ICellFCF = iCellFcfAll + para->getParH(level)->intFCBorder.kFC; // copy the created vectors to the memory addresses of the old arrays + // this is inefficient :( for (uint i = 0; i < (uint)iCellFccBorderVector.size(); i++) { iCellFccAll[i] = iCellFccBorderVector[i]; iCellFcfAll[i] = iCellFcfBorderVector[i]; @@ -477,25 +530,25 @@ void IndexRearrangementForStreams::splitCoarseToFineIntoBorderAndBulk(const uint para->getParD(level)->intCFBorder.kCF = para->getParH(level)->intCFBorder.kCF; para->getParD(level)->intCFBulk.kCF = para->getParH(level)->intCFBulk.kCF; para->getParD(level)->intCFBorder.ICellCFC = para->getParD(level)->intCF.ICellCFC; - para->getParD(level)->intCFBulk.ICellCFC = para->getParD(level)->intCFBorder.ICellCFC + para->getParD(level)->intCFBorder.kCF; + para->getParD(level)->intCFBulk.ICellCFC = + para->getParD(level)->intCFBorder.ICellCFC + para->getParD(level)->intCFBorder.kCF; para->getParD(level)->intCFBorder.ICellCFF = para->getParD(level)->intCF.ICellCFF; - para->getParD(level)->intCFBulk.ICellCFF = para->getParD(level)->intCFBorder.ICellCFF + para->getParD(level)->intCFBorder.kCF; - para->getParD(level)->offCFBulk.xOffCF = para->getParD(level)->offCF.xOffCF + para->getParD(level)->intCFBorder.kCF; - para->getParD(level)->offCFBulk.yOffCF = para->getParD(level)->offCF.yOffCF + para->getParD(level)->intCFBorder.kCF; - para->getParD(level)->offCFBulk.zOffCF = para->getParD(level)->offCF.zOffCF + para->getParD(level)->intCFBorder.kCF; + para->getParD(level)->intCFBulk.ICellCFF = + para->getParD(level)->intCFBorder.ICellCFF + para->getParD(level)->intCFBorder.kCF; + para->getParD(level)->offCFBulk.xOffCF = para->getParD(level)->offCF.xOffCF + para->getParD(level)->intCFBorder.kCF; + para->getParD(level)->offCFBulk.yOffCF = para->getParD(level)->offCF.yOffCF + para->getParD(level)->intCFBorder.kCF; + para->getParD(level)->offCFBulk.zOffCF = para->getParD(level)->offCF.zOffCF + para->getParD(level)->intCFBorder.kCF; } -void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level) -{ - // this function reorders the arrays of CFC/CFF indices and sets pointers and sizes of the new subarrays - +void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level) +{ // create some local variables for better readability - uint *iCellCfcAll = para->getParH(level)->intCF.ICellCFC; - uint *iCellCffAll = para->getParH(level)->intCF.ICellCFF; - uint *neighborX_SP = this->para->getParH(level)->neighborX_SP; - uint *neighborY_SP = this->para->getParH(level)->neighborY_SP; - uint *neighborZ_SP = this->para->getParH(level)->neighborZ_SP; - auto grid = this->builder->getGrid((uint)level); + uint *iCellCfcAll = para->getParH(level)->intCF.ICellCFC; + uint *iCellCffAll = para->getParH(level)->intCF.ICellCFF; + uint *neighborX_SP = this->para->getParH(level)->neighborX_SP; + uint *neighborY_SP = this->para->getParH(level)->neighborY_SP; + uint *neighborZ_SP = this->para->getParH(level)->neighborZ_SP; + auto grid = this->builder->getGrid((uint)level); std::vector<uint> iCellCfcBorderVector; std::vector<uint> iCellCfcBulkVector; @@ -520,7 +573,8 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level grid->isSparseIndexInFluidNodeIndicesBorder(neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]) || grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborX_SP[sparseIndexOfICellBSW]]) || grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborY_SP[sparseIndexOfICellBSW]]) || - grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]])) { + grid->isSparseIndexInFluidNodeIndicesBorder( + neighborZ_SP[neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]])) { iCellCfcBorderVector.push_back(iCellCfcAll[i]); iCellCffBorderVector.push_back(iCellCffAll[i]); @@ -541,13 +595,16 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level para->getParH(level)->intCFBorder.ICellCFF = para->getParH(level)->intCF.ICellCFF; para->getParH(level)->intCFBorder.kCF = (uint)iCellCfcBorderVector.size(); para->getParH(level)->intCFBulk.kCF = (uint)iCellCfcBulkVector.size(); - para->getParH(level)->intCFBulk.ICellCFC = para->getParH(level)->intCF.ICellCFC + para->getParH(level)->intCFBorder.kCF; - para->getParH(level)->intCFBulk.ICellCFF = para->getParH(level)->intCF.ICellCFF + para->getParH(level)->intCFBorder.kCF; - para->getParH(level)->offCFBulk.xOffCF = para->getParH(level)->offCF.xOffCF + para->getParH(level)->intCFBorder.kCF; - para->getParH(level)->offCFBulk.yOffCF = para->getParH(level)->offCF.yOffCF + para->getParH(level)->intCFBorder.kCF; - para->getParH(level)->offCFBulk.zOffCF = para->getParH(level)->offCF.zOffCF + para->getParH(level)->intCFBorder.kCF; + para->getParH(level)->intCFBulk.ICellCFC = + para->getParH(level)->intCF.ICellCFC + para->getParH(level)->intCFBorder.kCF; + para->getParH(level)->intCFBulk.ICellCFF = + para->getParH(level)->intCF.ICellCFF + para->getParH(level)->intCFBorder.kCF; + para->getParH(level)->offCFBulk.xOffCF = para->getParH(level)->offCF.xOffCF + para->getParH(level)->intCFBorder.kCF; + para->getParH(level)->offCFBulk.yOffCF = para->getParH(level)->offCF.yOffCF + para->getParH(level)->intCFBorder.kCF; + para->getParH(level)->offCFBulk.zOffCF = para->getParH(level)->offCF.zOffCF + para->getParH(level)->intCFBorder.kCF; // copy the created vectors to the memory addresses of the old arrays + // this is inefficient :( for (uint i = 0; i < (uint)iCellCfcBorderVector.size(); i++) { para->getParH(level)->intCFBorder.ICellCFC[i] = iCellCfcBorderVector[i]; para->getParH(level)->intCFBorder.ICellCFF[i] = iCellCffBorderVector[i]; @@ -556,10 +613,10 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level para->getParH(level)->offCF.zOffCF[i] = zOffCFBorderVector[i]; } for (uint i = 0; i < (uint)iCellCfcBulkVector.size(); i++) { - para->getParH(level)->intCFBulk.ICellCFC[i] = iCellCfcBulkVector[i]; - para->getParH(level)->intCFBulk.ICellCFF[i] = iCellCffBulkVector[i]; - para->getParH(level)->offCF.xOffCF[i + xOffCFBorderVector.size()] = xOffCFBulkVector[i]; - para->getParH(level)->offCF.yOffCF[i + yOffCFBorderVector.size()] = yOffCFBulkVector[i]; - para->getParH(level)->offCF.zOffCF[i + zOffCFBorderVector.size()] = zOffCFBulkVector[i]; + para->getParH(level)->intCFBulk.ICellCFC[i] = iCellCfcBulkVector[i]; + para->getParH(level)->intCFBulk.ICellCFF[i] = iCellCffBulkVector[i]; + para->getParH(level)->offCFBulk.xOffCF[i] = xOffCFBulkVector[i]; + para->getParH(level)->offCFBulk.yOffCF[i] = yOffCFBulkVector[i]; + para->getParH(level)->offCFBulk.zOffCF[i] = zOffCFBulkVector[i]; } } diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h index fb1e0695071fff38d52730b3c94fef0b1d67a93e..65ee08666247308c3cdf1e533106189b441a325e 100644 --- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h @@ -1,11 +1,16 @@ +//! \file IndexRearrangementForStreams.h +//! \ingroup GPU +//! \author Anna Wellmann +//! \ref master thesis of Anna Wellmann + #ifndef IndexRearrangementForStreams_H #define IndexRearrangementForStreams_H -#include "../GridProvider.h" +#include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h> -#include <vector> -#include <string> #include <memory> +#include <string> +#include <vector> #include "LBM/LB.h" @@ -13,69 +18,144 @@ class Parameter; class GridBuilder; namespace vf { - namespace gpu - { - class Communicator; - } +namespace gpu +{ +class Communicator; } +} // namespace vf class IndexRearrangementForStreams { private: - std::shared_ptr<GridBuilder> builder; + std::shared_ptr<GridBuilder> builder; std::shared_ptr<Parameter> para; public: + //! \brief construct IndexRearrangementForStreams object IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder); - + + ////////////////////////////////////////////////////////////////////////// // communication after coarse to fine + ////////////////////////////////////////////////////////////////////////// + + //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in x direction + //! \details Only the nodes involved in the interpolation need to be exchanged. Therefore in this method all nodes, + //! which are part of the interpolation as well as the communication, are identified. + //! + //! \ref see master thesis of Anna + //! Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten") void initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j, int direction); + //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in y direction + //! \details --> see x direction void initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction); + //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in z direction + //! \details --> see x direction void initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction); +public: + ////////////////////////////////////////////////////////////////////////// // split interpolation cells + ////////////////////////////////////////////////////////////////////////// + + //! \brief split the interpolation cells from coarse to fine into border an bulk + //! \details For communication hiding, the interpolation cells from the coarse to the fine grid need to be split + //! into two groups: + //! + //! - cells which are at the border between two gpus --> "border" + //! + //! - the other cells which are not directly related to the communication between the two gpus --> "bulk" + //! + //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation") void splitCoarseToFineIntoBorderAndBulk(const uint &level); - void splitFineToCoarseIntoBorderAndBulk(const uint &level); + //! \brief split the interpolation cells from fine to coarse into border an bulk + //! \details For communication hiding, the interpolation cells from the fine to the coarse grid need to be split + //! into two groups: + //! + //! - cells which are at the border between two gpus --> "border" + //! + //! - the other cells which are not directly related to the communication between the two gpus --> "bulk" + //! + //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation") + void splitFineToCoarseIntoBorderAndBulk(const uint &level); private: + ////////////////////////////////////////////////////////////////////////// // communication after coarse to fine - void copyProcessNeighborToCommAfterFtoCX(const uint &level, int j); - void copyProcessNeighborToCommAfterFtoCY(const uint &level, int j); - void copyProcessNeighborToCommAfterFtoCZ(const uint &level, int j); + ////////////////////////////////////////////////////////////////////////// + + //! \brief inits pointers for reduced communication after interpolation fine to coarse by copying them from "normal" + //! communication + void copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor); + void copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor); + void copyProcessNeighborToCommAfterFtoCZ(const uint &level, int indexOfProcessNeighbor); - void reorderSendIndicesForCommAfterFtoCX(int direction, int level, int j, + void reorderSendIndicesForCommAfterFtoCX(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderSendIndicesForCommAfterFtoCY(int direction, int level, int j, + void reorderSendIndicesForCommAfterFtoCY(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int j, + void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNeighborsAfterFtoC, int direction, - int level, int j, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); + //! \brief the send indices are reordered for the communication after the interpolation from fine to coarse + //! \details The indices of nodes which are part of the interpolation are moved to the front of vector with the send + //! indices. + //! \pre para->getParH(level)->intCF needs to be inititalized + //! \param sendIndices is the pointer to the vector with the send indices, which will be reordered in this function + //! \param numberOfSendNodesAfterFtoC will be set in this method + //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering + void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction, + int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); + //! \brief check if a sparse index occurs in the ICellFCC bool isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndexSend, int level); + //! \brief aggregate all nodes in the coarse cells for the interpolation in coarse to fine + //! \details For the coarse cells in the interpolation from coarse to fine only one node is stored. This methods + //! looks for the other nodes of each cell and puts them into vector. Duplicate nodes are only stored once. void aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC); + //! \brief add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC void addUniqueIndexToCommunicationVectors(std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend, std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions, uint &posInSendIndices) const; - void findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices, - std::vector<int> &sendIndicesAfterFtoC, - std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const; + //! \brief find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors() + void + findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices, + std::vector<int> &sendIndicesAfterFtoC, + std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const; + //! \brief find all indices which are not part of the communication after the interpolation from fine to coarse void findIndicesNotInCommAfterFtoC(const uint &numberOfSendOrRecvIndices, int *sendOrReceiveIndices, std::vector<int> &sendOrReceiveIndicesAfterFtoC, std::vector<int> &sendOrIndicesOther); - void reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int j, + void reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int j, + void reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int j, + void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - void reorderRecvIndicesForCommAfterFtoC(int *recvIndices, int &numberOfRecvNeighborsAfterFtoC, int direction, - int level, int j, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); - + + //! \brief reorder the receive indices in the same way that the send indices were reordered. + //! \details When the send indices are reordered, the receive indices need to be reordered accordingly. + //! \pre sendIndicesForCommAfterFtoCPositions should not be empty + //! \param recvIndices is the pointer to the vector with the receive indices, which will be reordered in this function + //! \param numberOfRecvNodesAfterFtoC will be set in this function + //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering and is used to reorder the receive indices in the same way + void reorderRecvIndicesForCommAfterFtoC(int *recvIndices, int &numberOfRecvNodesAfterFtoC, int direction, + int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions); + +private: + ////////////////////////////////////////////////////////////////////////// // split interpolation cells + ////////////////////////////////////////////////////////////////////////// + + //! \brief This function reorders the arrays of CFC/CFF indices and sets the pointers and sizes of the new + //! subarrays: \details The coarse cells for interpolation from coarse to fine (iCellCFC) are divided into two + //! subgroups: border and bulk. The fine cells (iCellCFF) are reordered accordingly. The offset cells (xOffCF, + //! yOffCF, zOffCF) must be reordered in the same way. void getGridInterfaceIndicesBorderBulkCF(int level); + + //! \brief This function reorders the arrays of FCC/FCF indices and return pointers and sizes of the new subarrays: + //! \details The coarse cells for interpolation from fine to coarse (iCellFCC) are divided into two subgroups: + //! border and bulk. The fine cells (iCellFCF) are reordered accordingly. void getGridInterfaceIndicesBorderBulkFC(int level); }; diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e414d4f3173e555b8944fa9637ebbd2023ce393c --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg @@ -0,0 +1,3 @@ +# these two parameters need to be defined in each config file +Path = /output/path +GridPath = /path/to/grid \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2982aa5c4eddbaba53473c57b87a6a1860d76f2e --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp @@ -0,0 +1,248 @@ +#include <gmock/gmock.h> + +#include <algorithm> +#include <filesystem> +#include <iostream> + +#include <Parameter/Parameter.h> +#include <basics/config/ConfigurationFile.h> + +#include <DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h> +#include <gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h> +#include <gpu/GridGenerator/grid/GridImp.h> + +template <typename T> +bool vectorsAreEqual(T *vector1, std::vector<T> vectorExpected) +{ + for (uint i = 0; i < vectorExpected.size(); i++) { + if (vector1[i] != vectorExpected[i]) + return false; + } + return true; +} + +class LevelGridBuilderDouble : public LevelGridBuilder +{ +private: + SPtr<Grid> grid; + LevelGridBuilderDouble() = default; + +public: + LevelGridBuilderDouble(SPtr<Grid> grid) : LevelGridBuilder(Device(), ""), grid(grid){}; + SPtr<Grid> getGrid(uint level) override { return grid; }; + std::shared_ptr<Grid> getGrid(int level, int box) override { return grid; }; +}; + +class GridImpDouble : public GridImp +{ +private: + std::vector<uint> fluidNodeIndicesBorder; + +public: + GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, + SPtr<GridStrategy> gridStrategy, Distribution d, uint level) + : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level) + { + } + + static SPtr<GridImpDouble> makeShared(Object *object, real startX, real startY, real startZ, real endX, real endY, + real endZ, real delta, SPtr<GridStrategy> gridStrategy, Distribution d, + uint level) + { + SPtr<GridImpDouble> grid( + new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level)); + return grid; + } + + void setFluidNodeIndicesBorder(std::vector<uint> fluidNodeIndicesBorder) + { + this->fluidNodeIndicesBorder = fluidNodeIndicesBorder; + } + + bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const override + { + return std::find(this->fluidNodeIndicesBorder.begin(), this->fluidNodeIndicesBorder.end(), sparseIndex) != + this->fluidNodeIndicesBorder.end(); + } +}; + +struct CFBorderBulk { + // data to work on + std::vector<uint> fluidNodeIndicesBorder = { 10, 11, 12, 13, 14, 15, 16 }; + std::vector<uint> iCellCFC = { 1, 11, 3, 13, 5, 15, 7 }; + std::vector<uint> iCellCFF = { 2, 12, 4, 14, 6, 16, 8 }; + uint sizeOfICellCf = (uint)iCellCFC.size(); + uint neighborX_SP[17] = { 0u }; + uint neighborY_SP[17] = { 0u }; + uint neighborZ_SP[17] = { 0u }; + int level = 0; + std::vector<real> offsetCFx = { 1, 11, 3, 13, 5, 15, 7 }; + std::vector<real> offsetCFy = { 101, 111, 103, 113, 105, 115, 107 }; + std::vector<real> offsetCFz = { 1001, 1011, 1003, 1013, 1005, 1015, 1007 }; + + // expected data + std::vector<uint> iCellCfcBorder_expected = { 11, 13, 15 }; + std::vector<uint> iCellCfcBulk_expected = { 1, 3, 5, 7 }; + std::vector<uint> iCellCffBorder_expected = { 12, 14, 16 }; + std::vector<uint> iCellCffBulk_expected = { 2, 4, 6, 8 }; + std::vector<real> offsetCFx_Border_expected = { 11, 13, 15 }; + std::vector<real> offsetCFx_Bulk_expected = { 1, 3, 5, 7 }; + std::vector<real> offsetCFy_Border_expected = { 111, 113, 115 }; + std::vector<real> offsetCFy_Bulk_expected = { 101, 103, 105, 107 }; + std::vector<real> offsetCFz_Border_expected = { 1011, 1013, 1015 }; + std::vector<real> offsetCFz_Bulk_expected = { 1001, 1003, 1005, 1007 }; +}; + +struct FCBorderBulk { + // data to work on + std::vector<uint> fluidNodeIndicesBorder = { 110, 111, 112, 113, 114, 115, 116 }; + std::vector<uint> iCellFCC = { 11, 111, 13, 113, 15, 115, 17 }; + std::vector<uint> iCellFCF = { 12, 112, 14, 114, 16, 116, 18 }; + uint sizeOfICellFC = (uint)iCellFCC.size(); + int level = 1; + + // expected data + std::vector<uint> iCellFccBorder_expected = { 111, 113, 115 }; + std::vector<uint> iCellFccBulk_expected = { 11, 13, 15, 17 }; + std::vector<uint> iCellFcfBorder_expected = { 112, 114, 116 }; + std::vector<uint> iCellFcfBulk_expected = { 12, 14, 16, 18 }; +}; + +static SPtr<Parameter> initParameterClass() +{ + std::filesystem::path filePath = __FILE__; // assuming that the config file is stored parallel to this file. + filePath.replace_filename("IndexRearrangementForStreamsTest.cfg"); + vf::basics::ConfigurationFile config; + config.load(filePath.string()); + return std::make_shared<Parameter>(config, 1, 0); +} + +class IndexRearrangementForStreamsTest_IndicesCFBorderBulkTest : public testing::Test +{ +public: + CFBorderBulk cf; + SPtr<Parameter> para; + +private: + static std::unique_ptr<IndexRearrangementForStreams> createTestSubjectCFBorderBulk(CFBorderBulk &cf, + std::shared_ptr<Parameter> para) + { + SPtr<GridImpDouble> grid = + GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1); + grid->setFluidNodeIndicesBorder(cf.fluidNodeIndicesBorder); + std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid); + + para->setMaxLevel(cf.level + 1); // setMaxLevel resizes parH and parD + para->parH[cf.level] = std::make_shared<LBMSimulationParameter>(); + para->parD[cf.level] = std::make_shared<LBMSimulationParameter>(); + para->getParH(cf.level)->intCF.ICellCFC = &(cf.iCellCFC.front()); + para->getParH(cf.level)->intCF.ICellCFF = &(cf.iCellCFF.front()); + para->getParH(cf.level)->neighborX_SP = cf.neighborX_SP; + para->getParH(cf.level)->neighborY_SP = cf.neighborY_SP; + para->getParH(cf.level)->neighborZ_SP = cf.neighborZ_SP; + para->getParH(cf.level)->intCF.kCF = cf.sizeOfICellCf; + para->getParH(cf.level)->offCF.xOffCF = &(cf.offsetCFx.front()); + para->getParH(cf.level)->offCF.yOffCF = &(cf.offsetCFy.front()); + para->getParH(cf.level)->offCF.zOffCF = &(cf.offsetCFz.front()); + + return std::make_unique<IndexRearrangementForStreams>(para, builder); + }; + + void SetUp() override + { + para = initParameterClass(); + auto testSubject = createTestSubjectCFBorderBulk(cf, para); + testSubject->splitCoarseToFineIntoBorderAndBulk(cf.level); + } +}; + +class IndexRearrangementForStreamsTest_IndicesFCBorderBulkTest : public testing::Test +{ +public: + FCBorderBulk fc; + SPtr<Parameter> para; + +private: + static std::unique_ptr<IndexRearrangementForStreams> createTestSubjectFCBorderBulk(FCBorderBulk &fc, + std::shared_ptr<Parameter> para) + { + SPtr<GridImpDouble> grid = + GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1); + grid->setFluidNodeIndicesBorder(fc.fluidNodeIndicesBorder); + std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid); + + para->setMaxLevel(fc.level + 1); // setMaxLevel resizes parH and parD + para->parH[fc.level] = std::make_shared<LBMSimulationParameter>(); + para->parD[fc.level] = std::make_shared<LBMSimulationParameter>(); + para->getParH(fc.level)->intFC.ICellFCC = &(fc.iCellFCC.front()); + para->getParH(fc.level)->intFC.ICellFCF = &(fc.iCellFCF.front()); + para->getParH(fc.level)->intFC.kFC = fc.sizeOfICellFC; + + return std::make_unique<IndexRearrangementForStreams>(para, builder); + }; + + void SetUp() override + { + para = initParameterClass(); + auto testSubject = createTestSubjectFCBorderBulk(fc, para); + testSubject->splitFineToCoarseIntoBorderAndBulk(fc.level); + } +}; + +TEST_F(IndexRearrangementForStreamsTest_IndicesCFBorderBulkTest, splitCoarseToFineIntoBorderAndBulk) +{ + EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF + para->getParH(cf.level)->intCFBulk.kCF, + testing::Eq(cf.sizeOfICellCf)) + << "The number of interpolation cells from coarse to fine changed during reordering."; + + // check coarse to fine border (coarse nodes) + EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCfcBorder_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFC, cf.iCellCfcBorder_expected)) + << "intCFBorder.ICellCFC does not match the expected border vector"; + // check coarse to fine border (fine nodes) + EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCffBorder_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFF, cf.iCellCffBorder_expected)) + << "intCFBorder.ICellCFF does not match the expected border vector"; + + // check coarse to fine bulk (coarse nodes) + EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCfcBulk_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFC, cf.iCellCfcBulk_expected)) + << "intCFBulk.ICellCFC does not match the expected bulk vector"; + // check coarse to fine bulk (fine nodes) + EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCffBulk_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFF, cf.iCellCffBulk_expected)) + << "intCFBulk.ICellCFF does not match the expected bulk vector"; + + // check offset cells + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.xOffCF, cf.offsetCFx_Border_expected)); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.xOffCF, cf.offsetCFx_Bulk_expected)); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.yOffCF, cf.offsetCFy_Border_expected)); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.yOffCF, cf.offsetCFy_Bulk_expected)); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.zOffCF, cf.offsetCFz_Border_expected)); + EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.zOffCF, cf.offsetCFz_Bulk_expected)); +} + +TEST_F(IndexRearrangementForStreamsTest_IndicesFCBorderBulkTest, splitFineToCoarseIntoBorderAndBulk) +{ + EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC + para->getParH(fc.level)->intFCBulk.kFC, + testing::Eq(fc.sizeOfICellFC)) + << "The number of interpolation cells from coarse to fine changed during reordering."; + + // check coarse to fine border (coarse nodes) + EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFccBorder_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCC, fc.iCellFccBorder_expected)) + << "intFCBorder.ICellFCC does not match the expected border vector"; + // check coarse to fine border (fine nodes) + EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFcfBorder_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCF, fc.iCellFcfBorder_expected)) + << "intFCBorder.ICellFCF does not match the expected border vector"; + + // check coarse to fine bulk (coarse nodes) + EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFccBulk_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCC, fc.iCellFccBulk_expected)) + << "intFCBulk.ICellFCC does not match the expected bulk vector"; + // check coarse to fine bulk (fine nodes) + EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFcfBulk_expected.size())); + EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCF, fc.iCellFcfBulk_expected)) + << "intFCBulk.ICellFCF does not match the expected bulk vector"; +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu index da37bb1e2fee675cb07410d483dac21dc84215dc..f57fd9dd9bc2a372c7790bf8f3837e69d1d52beb 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu @@ -26,9 +26,9 @@ // You should have received a copy of the GNU General Public License along // with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // -//! \file Cumulant27chim.cu +//! \file Cumulant27chimStream.cu //! \ingroup GPU -//! \author Martin Schoenherr +//! \author Martin Schoenherr, Anna Wellmann //======================================================================================= /* Device code */ #include "LBM/LB.h" diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 7019eb6cc56a93f21bb6559de74d74a84fcb24ea..bf31cc6d8c95453d1dd05d355a793d49e6864c4f 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -1486,7 +1486,7 @@ bool Parameter::findIndexInSendNodesYZ(int level, int index, int &indexOfProcess return false; } -void Parameter::initNumberOfProcessNeighborsAfterFtoCX(int level) +void Parameter::initProcessNeighborsAfterFtoCX(int level) { this->getParH(level)->sendProcessNeighborsAfterFtoCX.resize(this->getParH(level)->sendProcessNeighborX.size()); this->getParH(level)->recvProcessNeighborsAfterFtoCX.resize(this->getParH(level)->recvProcessNeighborX.size()); @@ -1496,7 +1496,7 @@ void Parameter::initNumberOfProcessNeighborsAfterFtoCX(int level) this->getParH(level)->recvProcessNeighborsAfterFtoCX.size()); } -void Parameter::initNumberOfProcessNeighborsAfterFtoCY(int level) +void Parameter::initProcessNeighborsAfterFtoCY(int level) { this->getParH(level)->sendProcessNeighborsAfterFtoCY.resize(this->getParH(level)->sendProcessNeighborY.size()); this->getParH(level)->recvProcessNeighborsAfterFtoCY.resize(this->getParH(level)->recvProcessNeighborY.size()); @@ -1506,7 +1506,7 @@ void Parameter::initNumberOfProcessNeighborsAfterFtoCY(int level) this->getParH(level)->recvProcessNeighborsAfterFtoCY.size()); } -void Parameter::initNumberOfProcessNeighborsAfterFtoCZ(int level) +void Parameter::initProcessNeighborsAfterFtoCZ(int level) { this->getParH(level)->sendProcessNeighborsAfterFtoCZ.resize(this->getParH(level)->sendProcessNeighborZ.size()); this->getParH(level)->recvProcessNeighborsAfterFtoCZ.resize(this->getParH(level)->recvProcessNeighborZ.size()); diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index cdedfa1e2b18d4751bef34141108528c64a61bd3..940b2493f96a8eab73d07d6a19031d708bc865dd 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -899,16 +899,16 @@ private: std::unique_ptr<CudaStreamManager> cudaStreamManager; public: - //! sets whether streams and thus communication hiding should be used - /*! This function is only useful for simulations on multiple GPUs. If there is only one MPI process, the passed value is automatically overwritten with false. */ + //! \brief sets whether streams and thus communication hiding should be used + //! \details This function is only useful for simulations on multiple GPUs. If there is only one MPI process, the passed value is automatically overwritten with false. void setUseStreams(bool useStreams); bool getUseStreams(); std::unique_ptr<CudaStreamManager> &getStreamManager(); bool getKernelNeedsFluidNodeIndicesToRun(); - void initNumberOfProcessNeighborsAfterFtoCX(int level); - void initNumberOfProcessNeighborsAfterFtoCY(int level); - void initNumberOfProcessNeighborsAfterFtoCZ(int level); + void initProcessNeighborsAfterFtoCX(int level); + void initProcessNeighborsAfterFtoCY(int level); + void initProcessNeighborsAfterFtoCZ(int level); void findEdgeNodesCommMultiGPU(); bool useReducedCommunicationAfterFtoC{ true }; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp index aa0551632e566768aaa9b087c072f665d6f7bc3d..df77d0fd4b668ccf745c8c1a04ce7beaa4133860 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp @@ -155,4 +155,221 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor) } +TEST(ParameterTest, findEdgeNodesXY_shouldReturnCorrectVector) +{ + + std::filesystem::path filePath = __FILE__; + filePath.replace_filename("parameterTest.cfg"); + vf::basics::ConfigurationFile config; + config.load(filePath.string()); + Parameter para(config, 1, 0); + + para.initLBMSimulationParameter(); + + int level = 0; + para.parH[level]->recvProcessNeighborX.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborY.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborY.push_back(ProcessNeighbor27()); + + int numRecvNeighbor = (int)para.parH[level]->recvProcessNeighborX.size() - 1; + int numSendNeighbor = (int)para.parH[level]->sendProcessNeighborY.size() - 1; + + const int sizeRecv = 6; + const int sizeSend = 10; + para.parH[level]->recvProcessNeighborX[numRecvNeighbor].numberOfNodes = sizeRecv; + para.parH[level]->sendProcessNeighborY[numSendNeighbor].numberOfNodes = sizeSend; + + int recvNeighbors[sizeRecv] = { 1, 2, 3, 4, 5, 6 }; + para.parH[level]->recvProcessNeighborX[numRecvNeighbor].index = recvNeighbors; + + int sendNeighbors[sizeSend] = { 20, 1, 21, 22, 6, 23, 5, 24, 25, 26 }; + para.parH[level]->sendProcessNeighborY[numSendNeighbor].index = sendNeighbors; + + + para.findEdgeNodesCommMultiGPU(); + + + std::vector<std::pair<int, int>> expectedEdgeNodesXtoYRecv = { std::pair(numRecvNeighbor, 0), + std::pair(numRecvNeighbor, 4), + std::pair(numRecvNeighbor, 5) }; + + std::vector<std::pair<int, int>> expectedEdgeNodesXtoYSend = { std::pair(numSendNeighbor, 1), + std::pair(numSendNeighbor, 6), + std::pair(numSendNeighbor, 4) }; + + EXPECT_THAT(para.parH[level]->edgeNodesXtoY.size(), testing::Eq(expectedEdgeNodesXtoYRecv.size())); + + bool vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoYRecv.size(); i++) { + if (para.parH[level]->edgeNodesXtoY[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoYRecv[i].first) { + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesXtoY[i].indexInRecvBuffer != expectedEdgeNodesXtoYRecv[i].second) { + vectorsAreIdentical = false; + break; + } + } + + EXPECT_TRUE(vectorsAreIdentical); + + vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoYSend.size(); i++) { + if (para.parH[level]->edgeNodesXtoY[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoYSend[i].first) { + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesXtoY[i].indexInSendBuffer != expectedEdgeNodesXtoYSend[i].second) { + vectorsAreIdentical = false; + break; + } + } + + EXPECT_TRUE(vectorsAreIdentical); +} + +TEST(ParameterTest, findEdgeNodesXZ_shouldReturnCorrectVector) +{ + + std::filesystem::path filePath = __FILE__; + filePath.replace_filename("parameterTest.cfg"); + vf::basics::ConfigurationFile config; + config.load(filePath.string()); + Parameter para(config, 1, 0); + + para.initLBMSimulationParameter(); + + int level = 0; + para.parH[level]->recvProcessNeighborX.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27()); + + int numRecvNeighbor = (int)para.parH[level]->recvProcessNeighborX.size() - 1; + int numSendNeighbor = (int)para.parH[level]->sendProcessNeighborZ.size() - 1; + + const int sizeRecv = 10; + const int sizeSend = 6; + + para.parH[level]->recvProcessNeighborX[numRecvNeighbor].numberOfNodes = sizeRecv; + para.parH[level]->sendProcessNeighborZ[numSendNeighbor].numberOfNodes = sizeSend; + + int recvNeighbors[sizeRecv] = { 20, 1, 21, 22, 6, 23, 5, 24, 25, 26 }; + para.parH[level]->recvProcessNeighborX[numRecvNeighbor].index = recvNeighbors; + + int sendNeighbors[sizeSend] = { 1, 2, 3, 4, 5, 6 }; + para.parH[level]->sendProcessNeighborZ[numSendNeighbor].index = sendNeighbors; + + + para.findEdgeNodesCommMultiGPU(); + + + std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair(numRecvNeighbor, 1), + std::pair(numRecvNeighbor, 4), + std::pair(numRecvNeighbor, 6) }; + std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair(numSendNeighbor, 0), + std::pair(numSendNeighbor, 5), + std::pair(numSendNeighbor, 4) }; + + EXPECT_THAT(para.parH[level]->edgeNodesXtoZ.size(), testing::Eq(expectedEdgeNodesXtoZRecv.size())); + + bool vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) { + if (para.parH[level]->edgeNodesXtoZ[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoZRecv[i].first) { + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesXtoZ[i].indexInRecvBuffer != expectedEdgeNodesXtoZRecv[i].second) { + vectorsAreIdentical = false; + break; + } + } + + EXPECT_TRUE(vectorsAreIdentical); + + vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) { + if (para.parH[level]->edgeNodesXtoZ[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoZSend[i].first){ + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesXtoZ[i].indexInSendBuffer != expectedEdgeNodesXtoZSend[i].second) { + vectorsAreIdentical = false; + break; + } + } + + EXPECT_TRUE(vectorsAreIdentical); +} + +TEST(ParameterTest, findEdgeNodesYZ_shouldReturnCorrectVector) +{ + + std::filesystem::path filePath = __FILE__; + filePath.replace_filename("parameterTest.cfg"); + vf::basics::ConfigurationFile config; + config.load(filePath.string()); + Parameter para(config, 1, 0); + + para.initLBMSimulationParameter(); + + int level = 0; + + para.parH[level]->recvProcessNeighborY.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27()); + para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27()); + + const int sizeRecv = 10; + const int sizeSend1 = 6; + const int sizeSend2 = 5; + + para.parH[level]->recvProcessNeighborY[0].numberOfNodes = sizeRecv; + para.parH[level]->sendProcessNeighborZ[0].numberOfNodes = sizeSend1; + para.parH[level]->sendProcessNeighborZ[1].numberOfNodes = sizeSend2; + + int recvNeighbors[sizeRecv] = { 20, 1, 9, 22, 6, 23, 5, 24, 11, 26 }; + para.parH[level]->recvProcessNeighborY[0].index = recvNeighbors; + + int sendNeighbors1[sizeSend1] = { 1, 2, 3, 4, 5, 6 }; + int sendNeighbors2[sizeSend2] = { 7, 8, 9, 10, 11 }; + para.parH[level]->sendProcessNeighborZ[0].index = sendNeighbors1; + para.parH[level]->sendProcessNeighborZ[1].index = sendNeighbors2; + + + para.findEdgeNodesCommMultiGPU(); + + + std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair(0, 1), std::pair(0, 2), std::pair(0, 4), + std::pair(0, 6), std::pair(0, 8) }; + std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair(0, 0), std::pair(1, 2), std::pair(0, 5), + std::pair(0, 4), std::pair(1, 4) }; + + EXPECT_THAT(para.parH[level]->edgeNodesYtoZ.size(), testing::Eq(expectedEdgeNodesXtoZRecv.size())); + + bool vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) { + if (para.parH[level]->edgeNodesYtoZ[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoZRecv[i].first) { + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesYtoZ[i].indexInRecvBuffer != expectedEdgeNodesXtoZRecv[i].second) { + vectorsAreIdentical = false; + break; + } + } + + EXPECT_TRUE(vectorsAreIdentical); + + vectorsAreIdentical = true; + for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) { + if (para.parH[level]->edgeNodesYtoZ[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoZSend[i].first) { + vectorsAreIdentical = false; + break; + } + if (para.parH[level]->edgeNodesYtoZ[i].indexInSendBuffer != expectedEdgeNodesXtoZSend[i].second) { + vectorsAreIdentical = false; + break; + } + } + EXPECT_TRUE(vectorsAreIdentical); +} \ No newline at end of file