From 6768b98908d0375296f6a2695e0b04bbde9298c1 Mon Sep 17 00:00:00 2001 From: Soeren Peters <peters@irmb.tu-bs.de> Date: Mon, 5 Oct 2020 18:11:39 +0200 Subject: [PATCH] Add metis 5.1.1 and use it when metis METIS_INCLUDEDIR is not set in the config file. Changed in metis CMakeLists: - add cmake required version to 3.0 - removed installation - add METIS options for idx54 and real64 bit. - set target include path - set public target defintions - put metis target into the 3rd folder group. --- 3rdParty/metis/metis-5.1.1/CMakeLists.txt | 50 + 3rdParty/metis/metis-5.1.1/Changelog | 286 + 3rdParty/metis/metis-5.1.1/GKlib/.gitignore | 57 + .../metis/metis-5.1.1/GKlib/CMakeLists.txt | 31 + 3rdParty/metis/metis-5.1.1/GKlib/GKlib.h | 85 + .../metis/metis-5.1.1/GKlib/GKlibSystem.cmake | 137 + 3rdParty/metis/metis-5.1.1/GKlib/LICENSE.txt | 18 + 3rdParty/metis/metis-5.1.1/GKlib/Makefile | 80 + 3rdParty/metis/metis-5.1.1/GKlib/README.md | 54 + 3rdParty/metis/metis-5.1.1/GKlib/b64.c | 95 + 3rdParty/metis/metis-5.1.1/GKlib/blas.c | 37 + 3rdParty/metis/metis-5.1.1/GKlib/cache.c | 126 + .../GKlib/conf/check_thread_storage.c | 5 + 3rdParty/metis/metis-5.1.1/GKlib/csr.c | 3378 +++++ 3rdParty/metis/metis-5.1.1/GKlib/error.c | 214 + 3rdParty/metis/metis-5.1.1/GKlib/evaluate.c | 132 + 3rdParty/metis/metis-5.1.1/GKlib/fkvkselect.c | 142 + 3rdParty/metis/metis-5.1.1/GKlib/fs.c | 225 + 3rdParty/metis/metis-5.1.1/GKlib/getopt.c | 855 ++ 3rdParty/metis/metis-5.1.1/GKlib/gk_arch.h | 68 + 3rdParty/metis/metis-5.1.1/GKlib/gk_defs.h | 87 + 3rdParty/metis/metis-5.1.1/GKlib/gk_externs.h | 25 + 3rdParty/metis/metis-5.1.1/GKlib/gk_getopt.h | 64 + 3rdParty/metis/metis-5.1.1/GKlib/gk_macros.h | 169 + 3rdParty/metis/metis-5.1.1/GKlib/gk_mkblas.h | 203 + .../metis/metis-5.1.1/GKlib/gk_mkmemory.h | 142 + .../metis/metis-5.1.1/GKlib/gk_mkpqueue.h | 440 + .../metis/metis-5.1.1/GKlib/gk_mkpqueue2.h | 215 + .../metis/metis-5.1.1/GKlib/gk_mkrandom.h | 123 + 3rdParty/metis/metis-5.1.1/GKlib/gk_mksort.h | 271 + 3rdParty/metis/metis-5.1.1/GKlib/gk_mkutils.h | 40 + 3rdParty/metis/metis-5.1.1/GKlib/gk_proto.h | 423 + 3rdParty/metis/metis-5.1.1/GKlib/gk_struct.h | 296 + 3rdParty/metis/metis-5.1.1/GKlib/gk_types.h | 38 + 3rdParty/metis/metis-5.1.1/GKlib/gk_util.c | 107 + 3rdParty/metis/metis-5.1.1/GKlib/gkregex.c | 10704 ++++++++++++++++ 3rdParty/metis/metis-5.1.1/GKlib/gkregex.h | 556 + 3rdParty/metis/metis-5.1.1/GKlib/graph.c | 1940 +++ 3rdParty/metis/metis-5.1.1/GKlib/htable.c | 247 + 3rdParty/metis/metis-5.1.1/GKlib/io.c | 621 + 3rdParty/metis/metis-5.1.1/GKlib/itemsets.c | 210 + 3rdParty/metis/metis-5.1.1/GKlib/mcore.c | 393 + 3rdParty/metis/metis-5.1.1/GKlib/memory.c | 282 + .../metis/metis-5.1.1/GKlib/ms_inttypes.h | 301 + 3rdParty/metis/metis-5.1.1/GKlib/ms_stat.h | 22 + 3rdParty/metis/metis-5.1.1/GKlib/ms_stdint.h | 222 + 3rdParty/metis/metis-5.1.1/GKlib/pqueue.c | 25 + 3rdParty/metis/metis-5.1.1/GKlib/random.c | 136 + 3rdParty/metis/metis-5.1.1/GKlib/rw.c | 103 + .../metis-5.1.1/GKlib/scripts/gexpand.pl | 53 + 3rdParty/metis/metis-5.1.1/GKlib/seq.c | 174 + 3rdParty/metis/metis-5.1.1/GKlib/sort.c | 437 + 3rdParty/metis/metis-5.1.1/GKlib/string.c | 525 + .../metis-5.1.1/GKlib/test/CMakeLists.txt | 20 + .../metis/metis-5.1.1/GKlib/test/cmpnbrs.c | 301 + .../metis/metis-5.1.1/GKlib/test/csrcnv.c | 397 + 3rdParty/metis/metis-5.1.1/GKlib/test/fis.c | 286 + .../metis/metis-5.1.1/GKlib/test/gkgraph.c | 845 ++ .../metis/metis-5.1.1/GKlib/test/gksort.c | 346 + 3rdParty/metis/metis-5.1.1/GKlib/test/grKx.c | 256 + .../metis/metis-5.1.1/GKlib/test/m2mnbrs.c | 304 + 3rdParty/metis/metis-5.1.1/GKlib/test/rw.c | 306 + .../metis/metis-5.1.1/GKlib/test/splatt2svd.c | 98 + .../metis/metis-5.1.1/GKlib/test/strings.c | 82 + 3rdParty/metis/metis-5.1.1/GKlib/timers.c | 52 + 3rdParty/metis/metis-5.1.1/GKlib/tokenizer.c | 77 + .../metis/metis-5.1.1/GKlib/win32/adapt.c | 11 + .../metis/metis-5.1.1/GKlib/win32/adapt.h | 14 + 3rdParty/metis/metis-5.1.1/LICENSE | 19 + 3rdParty/metis/metis-5.1.1/README.md | 171 + .../metis/metis-5.1.1/include/CMakeLists.txt | 3 + 3rdParty/metis/metis-5.1.1/include/metis.h | 358 + .../metis/metis-5.1.1/libmetis/CMakeLists.txt | 16 + 3rdParty/metis/metis-5.1.1/libmetis/auxapi.c | 43 + 3rdParty/metis/metis-5.1.1/libmetis/balance.c | 498 + .../metis/metis-5.1.1/libmetis/bucketsort.c | 44 + .../metis/metis-5.1.1/libmetis/checkgraph.c | 266 + 3rdParty/metis/metis-5.1.1/libmetis/coarsen.c | 1971 +++ .../metis/metis-5.1.1/libmetis/compress.c | 229 + 3rdParty/metis/metis-5.1.1/libmetis/contig.c | 699 + 3rdParty/metis/metis-5.1.1/libmetis/debug.c | 461 + 3rdParty/metis/metis-5.1.1/libmetis/defs.h | 60 + 3rdParty/metis/metis-5.1.1/libmetis/fm.c | 543 + 3rdParty/metis/metis-5.1.1/libmetis/fortran.c | 142 + 3rdParty/metis/metis-5.1.1/libmetis/frename.c | 136 + 3rdParty/metis/metis-5.1.1/libmetis/gklib.c | 120 + .../metis/metis-5.1.1/libmetis/gklib_defs.h | 53 + .../metis/metis-5.1.1/libmetis/gklib_rename.h | 122 + 3rdParty/metis/metis-5.1.1/libmetis/graph.c | 423 + .../metis/metis-5.1.1/libmetis/initpart.c | 630 + 3rdParty/metis/metis-5.1.1/libmetis/kmetis.c | 619 + 3rdParty/metis/metis-5.1.1/libmetis/kwayfm.c | 2548 ++++ .../metis/metis-5.1.1/libmetis/kwayrefine.c | 677 + 3rdParty/metis/metis-5.1.1/libmetis/macros.h | 258 + 3rdParty/metis/metis-5.1.1/libmetis/mcutil.c | 330 + 3rdParty/metis/metis-5.1.1/libmetis/mesh.c | 412 + .../metis/metis-5.1.1/libmetis/meshpart.c | 262 + .../metis/metis-5.1.1/libmetis/metislib.h | 41 + 3rdParty/metis/metis-5.1.1/libmetis/minconn.c | 729 ++ .../metis/metis-5.1.1/libmetis/mincover.c | 259 + 3rdParty/metis/metis-5.1.1/libmetis/mmd.c | 597 + 3rdParty/metis/metis-5.1.1/libmetis/ometis.c | 701 + 3rdParty/metis/metis-5.1.1/libmetis/options.c | 541 + .../metis/metis-5.1.1/libmetis/parmetis.c | 817 ++ 3rdParty/metis/metis-5.1.1/libmetis/pmetis.c | 387 + 3rdParty/metis/metis-5.1.1/libmetis/proto.h | 357 + 3rdParty/metis/metis-5.1.1/libmetis/refine.c | 216 + 3rdParty/metis/metis-5.1.1/libmetis/rename.h | 269 + .../metis/metis-5.1.1/libmetis/separator.c | 176 + 3rdParty/metis/metis-5.1.1/libmetis/sfm.c | 612 + 3rdParty/metis/metis-5.1.1/libmetis/srefine.c | 165 + 3rdParty/metis/metis-5.1.1/libmetis/stat.c | 179 + .../metis/metis-5.1.1/libmetis/stdheaders.h | 29 + 3rdParty/metis/metis-5.1.1/libmetis/struct.h | 219 + 3rdParty/metis/metis-5.1.1/libmetis/timing.c | 63 + 3rdParty/metis/metis-5.1.1/libmetis/util.c | 138 + 3rdParty/metis/metis-5.1.1/libmetis/wspace.c | 214 + CMake/cmake_config_files/BILBO.config.cmake | 8 - CMake/cmake_config_files/ELLADAN.config.cmake | 7 +- cpu.cmake | 4 + src/cpu/VirtualFluidsCore/CMakeLists.txt | 9 +- 121 files changed, 47897 insertions(+), 17 deletions(-) create mode 100644 3rdParty/metis/metis-5.1.1/CMakeLists.txt create mode 100644 3rdParty/metis/metis-5.1.1/Changelog create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/.gitignore create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/CMakeLists.txt create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/GKlib.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/LICENSE.txt create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/Makefile create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/README.md create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/b64.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/blas.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/cache.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/conf/check_thread_storage.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/csr.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/error.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/evaluate.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/fkvkselect.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/fs.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/getopt.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_arch.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_defs.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_externs.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_getopt.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_macros.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkblas.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkmemory.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue2.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkrandom.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mksort.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_mkutils.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_proto.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_struct.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_types.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gk_util.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gkregex.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/gkregex.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/graph.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/htable.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/io.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/itemsets.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/mcore.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/memory.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/ms_inttypes.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/ms_stat.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/ms_stdint.h create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/pqueue.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/random.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/rw.c create mode 100755 3rdParty/metis/metis-5.1.1/GKlib/scripts/gexpand.pl create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/seq.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/sort.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/string.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/CMakeLists.txt create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/cmpnbrs.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/csrcnv.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/fis.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/gkgraph.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/gksort.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/grKx.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/m2mnbrs.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/rw.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/splatt2svd.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/test/strings.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/timers.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/tokenizer.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.c create mode 100644 3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.h create mode 100644 3rdParty/metis/metis-5.1.1/LICENSE create mode 100644 3rdParty/metis/metis-5.1.1/README.md create mode 100644 3rdParty/metis/metis-5.1.1/include/CMakeLists.txt create mode 100644 3rdParty/metis/metis-5.1.1/include/metis.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/auxapi.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/balance.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/bucketsort.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/checkgraph.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/coarsen.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/compress.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/contig.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/debug.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/defs.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/fm.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/fortran.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/frename.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/gklib.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/gklib_defs.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/gklib_rename.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/graph.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/initpart.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/kmetis.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/kwayfm.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/kwayrefine.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/macros.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/mcutil.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/mesh.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/meshpart.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/metislib.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/minconn.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/mincover.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/mmd.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/ometis.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/options.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/parmetis.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/pmetis.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/proto.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/refine.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/rename.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/separator.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/sfm.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/srefine.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/stat.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/stdheaders.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/struct.h create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/timing.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/util.c create mode 100644 3rdParty/metis/metis-5.1.1/libmetis/wspace.c diff --git a/3rdParty/metis/metis-5.1.1/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/CMakeLists.txt new file mode 100644 index 000000000..88f60da78 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.0) +project(METIS C) + +set(GKLIB_PATH "${CMAKE_CURRENT_SOURCE_DIR}/GKlib") +#set(SHARED FALSE CACHE BOOL "build a shared library") + +#if(MSVC) +# set(METIS_INSTALL FALSE) +#else() +# set(METIS_INSTALL TRUE) +#endif() + +# Configure libmetis library. +if(BUILD_SHARED_LIBS) + set(METIS_LIBRARY_TYPE SHARED) +else() + set(METIS_LIBRARY_TYPE STATIC) +endif() + +include(${GKLIB_PATH}/GKlibSystem.cmake) + +# METIS' custom options +option(METIS_IDX64 "enable 64 bit ints" OFF) +option(METIS_REAL64 "enable 64 bit floats (i.e., double)" OFF) + +if(METIS_IDX64) + set(METIS_COPTIONS_IDX "-DIDXTYPEWIDTH=64") +else() + set(METIS_COPTIONS_IDX "-DIDXTYPEWIDTH=32") +endif() + +if(METIS_REAL64) + set(METIS_COPTIONS_REAL "-DREALTYPEWIDTH=64") +else() + set(METIS_COPTIONS_REAL "-DREALTYPEWIDTH=32") +endif() + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${METIS_COPTIONS_IDX} ${METIS_COPTIONS_REAL}") + + +add_subdirectory("libmetis") + +target_include_directories(metis PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libmetis) +target_include_directories(metis PRIVATE ${GKLIB_PATH}) + +target_include_directories(metis PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_compile_definitions(metis PUBLIC ${METIS_COPTIONS_IDX}) +target_compile_definitions(metis PUBLIC ${METIS_COPTIONS_REAL}) + +groupTarget(metis ${thirdFolder}) \ No newline at end of file diff --git a/3rdParty/metis/metis-5.1.1/Changelog b/3rdParty/metis/metis-5.1.1/Changelog new file mode 100644 index 000000000..4b2db8865 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/Changelog @@ -0,0 +1,286 @@ + +metis-5.1.0 +------------------------------------------------------------------------ +r13937 | karypis | 2013-03-29 23:08:21 -0500 (Fri, 29 Mar 2013) + +- Further extended the 2-hop coarsening scheme introduced in 5.0.2 for + for graphs with highly variable degree distribution (e.g., power-law). + This coarsening scheme is automatically used when the standard + 1-hop-based scheme leaves a large fraction of the vertices of the + graph unmatched. It leads to better quality partitionings, lower + memory utilization, and faster execution time. In principle, this + scheme will never be triggered for graphs/matrices appearing in + scientific computations derived from FE meshes. However, if you + notice that the quality of the solutions is significantly worse, + this 2-hop matching can be turned off by using the '-no2hop' command + line option and the associated options[] parameter (as described + in the manual). +- Fixed 0/1 numbering issue with mesh partitioning routines (flyspray + issue #109) + + +metis-5.0.3 +------------------------------------------------------------------------ +r13822 | karypis | 2013-03-11 14:40:11 -0500 (Mon, 11 Mar 2013) + +- Fixed the bug that was introduced in 5.x for creating nodal graphs + from meshes (flyspray issue #107). +- Changed the license to Apache Version 2. + + +metis-5.0.2 +------------------------------------------------------------------------ +r10974 | karypis | 2011-10-29 18:24:32 -0500 (Sat, 29 Oct 2011) + +- Fixed issue with high-degree vertices and mask-based compression. +- Fixed issue with wrong COARSENING_FRACTION. +- Modified coarsening schemes to better support non FE graphs. + + +metis-5.0.1 +------------------------------------------------------------------------ +r10709 | karypis | 2011-08-31 16:07:57 -0500 (Wed, 31 Aug 2011) + +- Fixed critical bug in the mesh partitioning routines. + + +metis-5.0 +------------------------------------------------------------------------ +r10667 | karypis | 2011-08-04 00:35:30 -0500 (Thu, 04 Aug 2011) + +- Updated/corrected error messages. +- Addressed some build issues. + + +metis-5.0rc3 +------------------------------------------------------------------------ +r10560 | karypis | 2011-07-13 08:19:10 -0500 (Wed, 13 Jul 2011) + +- Fixed various bugs that were identified by testers. +- Some minor performance and quality improvements. +- Addressed some build issues. + + +metis-5.0rc2 +------------------------------------------------------------------------ +r10496 | karypis | 2011-07-06 11:04:45 -0500 (Wed, 06 Jul 2011) + +- Various run-time and quality optimizations. +- Option error-checking. +- Signal-based heap cleanup on error. Metis API routines will not + return nicely and cleanup all memory that may have allocated. +- Reduced memory requirements. +- Fixed various bugs identified in rc1. +- Added back Fortran support in the form of alternate API names + (see libmetis/frename.h). +- Minor code changes to accommodate ParMetis 4.0. + + +metis-5.0rc1 +------------------------------------------------------------------------ +r10227 | karypis | 2011-06-13 23:35:05 -0500 (Mon, 13 Jun 2011) + +- A nearly complete re-write of Metis' code-based that changed expanded + the functionality of the command-line programs and API routines. +- Multi-constraint partitioning can be used in conjunction with + minimization of the total communication volume. +- All graph and mesh partitioning routines take as input the target + sizes of the partitions, which among others, allow them to compute + partitioning solutions that are well-suited for parallel architectures + with heterogeneous computing capabilities. +- When multi-constraint partitioning is used, the target sizes of the + partitions are specified on a per partition-constraint pair. +- The multilevel k-way partitioning algorithms can compute a + partitioning solution in which each partition is contiguous. +- All partitioning and ordering routines can compute multiple different + solutions and select the best as the final solution. +- The mesh partitioning and mesh-to-graph conversion routines can + operate on mixed element meshes. +- The command-line programs provide full access to the entire set of + capabilities provided by Metis' API. +- Re-written the memory management subsystem to reduce overall memory + requirements. + + + +metis-5.0pre2 +------------------------------------------------------------------------ +r1437 | karypis | 2007-04-07 23:16:16 -0500 (Sat, 07 Apr 2007) + +- Added installation instructions and change-logs. +- Tested 32bit & 64bit on 64bit architectures and passed tests. +- Tested 32bit on 32bit architectures and passed tests. +- strtoidx() addition for portable input file parsing +- Restructured the internal memory allocation schemes for graph and + refinement data. This should enhance portability and make the code + easier to maintain. +- Fixed some bad memory allocation calls (i.e., sizeof(x)/sizeof(idxtype). + However, there are tons of those and need to be corrected once and for + all by eliminating workspace and the associated mallocs. +- Added mprint/mscanf family of functions for portable formated I/O + of the idxtype datatype. The specifier for this datatype is %D. + All library routines use this function for printing. + The implementation of these routines is not very efficient, but + that should do for now (in principle these routines should not be + used unless debugging). +- Incorporated GKlib into METIS, which replaced many of its internal + functions. GKlib's malloc interface will enable graceful and clean + aborts (i.e., free all internally allocated memory) on fatal errors. + This will probably be available in the next pre-release. +- Fixed the problems associated with metis.h that were identified by + David (flyspray Issue #9). + + +METIS 4.0.2, 3/10/04 +------------------------------------------------------------------------------ +- Fixed a problem with weighted graphs and ometis.c + + +METIS 4.0.1, 11/29/98 +------------------------------------------------------------------------------ +This is mostly a bug-fix release + + - Fixed some bugs in the multi-constraint partitioning routines + - Fixed some bugs in the volume-minimization routines + + + +METIS 4.0.0, 9/20/98 +------------------------------------------------------------------------------ +METIS 4.0 contains a number of changes over the previous major release (ver +3.0.x). Most of these changes are concentrated on the graph and mesh +partitioning routines and they do not affect the sparse matrix re-ordering +routines. Here is a list of the major changes: + + Multi-Constraint Partitioning + ----------------------------- + METIS now includes partitioning routines that can be used to a partition + a graph in the presence of multiple balancing constraints. + + Minimizing the Total Communication Volume + ----------------------------------------- + METIS now includes partitioning routines whose objective is to minimize + the total communication volume (as opposed to minimizing the edge-cut). + + Minimizing the Maximum Connectivity of the Subdomains + ----------------------------------------------------- + The k-way partitioning routines in METIS can now directly minimize the number + of adjacent subdomains. For most graphs corresponding to finite element + meshes, METIS is able to significantly reduce the maximum (and total) number of + adjacent subdomains. + + + + +METIS 3.0.6, 1/28/98 +------------------------------------------------------------------------------- + - Fixed some problems when too many partitions were asked, and each partition + end up having 0 vertices + - Fixed some bugs in the I/O routines + - Added support for the g77 compiler under Linux + + +METIS 3.0.5, 12/22/97 +------------------------------------------------------------------------------- + - Fixed problems on 64-bit architectures (eg., -64 option on SGIs). + - Added some options in Makefile.in + + +METIS 3.0.4, 12/1/97 +------------------------------------------------------------------------------- + Fixed a memory leak in the ordering code. + + +METIS 3.0.3, 11/5/97 +------------------------------------------------------------------------------- + This is mostly a bug-fix release with just a few additions + + Added functionality + - Added support for quadrilateral elements. + - Added a routine METIS_EstimateMemory that estimates the amount of + memory that will be allocated by METIS. This is useful in determining + if a problem can run on your system. + - Added hooks to allow PARMETIS to use the orderings produced by METIS. + This is hidden from the user but it will be used in the next release + of PARMETIS. + + Bug-fixes + - Fixed a bug related to memory allocation. This should somewhat reduce the + overall memory used by METIS. + - Fixed some bugs in the 'graphchk' program in the case of weighted graphs. + - Removed some code corresponding to unused options. + - Fixed some minor bugs in the node-refinement code + + + +------------------------------------------------------------------------------- +METIS 3.0 contains a number of changes over METIS 2.0. +The major changes are the following: + + General Changes + --------------- + 1. Added code to directly partition finite element meshes. + + 2. Added code to convert finite element meshes into graphs so they + can be used by METIS. + + 1. The names, calling sequences, and options of the routines in + METISlib have been changed. + + 2. Better support has been added for Fortran programs. + + 3. Eliminated the 'metis' program. The only way to tune METIS's + behavior is to use METISlib. + + 4. Improved memory management. METIS should now only abort if truly + there is no more memory left in the system. + + + Graph Partitioning + ------------------ + 1. Added partitioning routines that can be used to compute a partition + with prescribed partition weights. For example, they can be used to + compute a 3-way partition such that partition 1 has 50% of the weight, + partition 2 has 20% of the way, and partition 3 has 30% of the weight. + + 2. Improved the speed of the k-way partitioning algorithm (kmetis). The + new code has better cache locality which dramatically improves the + speed for large graphs. A factor of 4 speedup can be obtained for + certain graphs. METIS can now partition a 4 million node graph + in well under a minute on a MIPS R10000. + + 3. Eliminated some of the options that were seldom used. + + + Fill-Reducing Orderings + ---------------------- + 1. Added a node based ordering code `onmetis' that greatly improves + ordering quality. + + 2. Improved the quality of the orderings produced by the original + edge-based ordering code (it is now called 'oemetis'). + + 3. METIS can now analyze the graph and try to compress together + nodes with identical sparsity pattern. For some problems, this + significantly reduces ordering time + + 4. METIS can now prune dense columns prior to ordering. This can be + helpful for LP matrices. + + + Mesh Partitioning + ----------------- + 1. METIS can now directly partition the element node array of finite + element meshes. It produces two partitioning vectors. One for the + elements and one for the nodes. METIS supports the following + elements: triangles, tetrahedra, hexahedra + + + Mesh-To-Graph Conversion Routines + --------------------------------- + 1. METIS now includes a number of mesh conversion functions that can + be used to create the dual and nodal graphs directly from the + element connectivity arrays. These are highly optimized routines. + + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/.gitignore b/3rdParty/metis/metis-5.1.1/GKlib/.gitignore new file mode 100644 index 000000000..80d93c4c3 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/.gitignore @@ -0,0 +1,57 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +# GK things +build/ +lib/ +.svn/ diff --git a/3rdParty/metis/metis-5.1.1/GKlib/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/GKlib/CMakeLists.txt new file mode 100644 index 000000000..9cd1b4bfa --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 2.8) +project(GKlib C) + +option(BUILD_SHARED_LIBS "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" OFF) + +get_filename_component(abs "." ABSOLUTE) +set(GKLIB_PATH ${abs}) +unset(abs) +include(GKlibSystem.cmake) + +include_directories(".") +if(MSVC) + include_directories("win32") + file(GLOB win32_sources RELATIVE "win32" "*.c") +else(MSVC) + set(win32_sources, "") +endif(MSVC) + +add_library(GKlib ${GKlib_sources} ${win32_sources}) + +if(UNIX) + target_link_libraries(GKlib m) +endif(UNIX) + +include_directories("test") +add_subdirectory("test") + +install(TARGETS GKlib + ARCHIVE DESTINATION lib/${LINSTALL_PATH} + LIBRARY DESTINATION lib/${LINSTALL_PATH}) +install(FILES ${GKlib_includes} DESTINATION include/${HINSTALL_PATH}) diff --git a/3rdParty/metis/metis-5.1.1/GKlib/GKlib.h b/3rdParty/metis/metis-5.1.1/GKlib/GKlib.h new file mode 100644 index 000000000..9278fe414 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/GKlib.h @@ -0,0 +1,85 @@ +/* + * GKlib.h + * + * George's library of most frequently used routines + * + * $Id: GKlib.h 14866 2013-08-03 16:40:04Z karypis $ + * + */ + +#ifndef _GKLIB_H_ +#define _GKLIB_H_ 1 + +#define GKMSPACE + +#if defined(_MSC_VER) +#define __MSC__ +#endif +#if defined(__ICC) +#define __ICC__ +#endif + + +#include "gk_arch.h" /*!< This should be here, prior to the includes */ + + +/************************************************************************* +* Header file inclusion section +**************************************************************************/ +#include <stddef.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <memory.h> +#include <errno.h> +#include <ctype.h> +#include <math.h> +#include <float.h> +#include <time.h> +#include <string.h> +#include <limits.h> +#include <signal.h> +#include <setjmp.h> +#include <assert.h> +#include <sys/stat.h> + +#if defined(__WITHPCRE__) + #include <pcreposix.h> +#else + #if defined(USE_GKREGEX) + #include "gkregex.h" + #else + #include <regex.h> + #endif /* defined(USE_GKREGEX) */ +#endif /* defined(__WITHPCRE__) */ + + + +#if defined(__OPENMP__) +#include <omp.h> +#endif + + + + +#include <gk_types.h> +#include <gk_struct.h> +#include <gk_externs.h> +#include <gk_defs.h> +#include <gk_macros.h> +#include <gk_getopt.h> + +#include <gk_mksort.h> +#include <gk_mkblas.h> +#include <gk_mkmemory.h> +#include <gk_mkpqueue.h> +#include <gk_mkpqueue2.h> +#include <gk_mkrandom.h> +#include <gk_mkutils.h> + +#include <gk_proto.h> + + +#endif /* GKlib.h */ + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake b/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake new file mode 100644 index 000000000..d83b2083c --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/GKlibSystem.cmake @@ -0,0 +1,137 @@ +# Helper modules. +include(CheckFunctionExists) +include(CheckIncludeFile) + +# Setup options. +option(GDB "enable use of GDB" OFF) +option(ASSERT "turn asserts on" OFF) +option(ASSERT2 "additional assertions" OFF) +option(DEBUG "add debugging support" OFF) +option(GPROF "add gprof support" OFF) +option(OPENMP "enable OpenMP support" OFF) +option(PCRE "enable PCRE support" OFF) +option(GKREGEX "enable GKREGEX support" OFF) +option(GKRAND "enable GKRAND support" OFF) + + +# Add compiler flags. +if(MSVC) + set(GKlib_COPTS "/Ox") + set(GKlib_COPTIONS "-DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE -DUSE_GKREGEX") +elseif(MINGW) + set(GKlib_COPTS "-DUSE_GKREGEX") +else() + set(GKlib_COPTIONS "-DLINUX -D_FILE_OFFSET_BITS=64") +endif(MSVC) +if(CYGWIN) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DCYGWIN") +endif(CYGWIN) +if(CMAKE_COMPILER_IS_GNUCC) +# GCC opts. + set(GKlib_COPTIONS "${GKlib_COPTIONS} -std=c99 -fno-strict-aliasing") + set(GKlib_COPTIONS "${GKlib_COPTIONS} -march=native") + if(NOT MINGW) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -fPIC") + endif(NOT MINGW) +# GCC warnings. + set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror -Wall -pedantic -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unknown-pragmas -Wno-unused-label") +elseif(${CMAKE_C_COMPILER_ID} MATCHES "Sun") +# Sun insists on -xc99. + set(GKlib_COPTIONS "${GKlib_COPTIONS} -xc99") +endif(CMAKE_COMPILER_IS_GNUCC) + +# Intel compiler +if(${CMAKE_C_COMPILER_ID} MATCHES "Intel") + set(GKlib_COPTIONS "${GKlib_COPTIONS} -xHost -std=c99") +endif() + +# Find OpenMP if it is requested. +if(OPENMP) + include(FindOpenMP) + if(OPENMP_FOUND) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__OPENMP__ ${OpenMP_C_FLAGS}") + else() + message(WARNING "OpenMP was requested but support was not found") + endif(OPENMP_FOUND) +endif(OPENMP) + + +# Add various definitions. +if(GDB) + set(GKlib_COPTS "${GKlib_COPTS} -g") + set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror") +else() + set(GKlib_COPTS "-O3") +endif(GDB) + + +if(DEBUG) + set(GKlib_COPTS "-g") + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DDEBUG") +endif(DEBUG) + +if(GPROF) + set(GKlib_COPTS "-pg") +endif(GPROF) + +if(NOT ASSERT) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG") +endif(NOT ASSERT) + +if(NOT ASSERT2) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG2") +endif(NOT ASSERT2) + + +# Add various options +if(PCRE) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__WITHPCRE__") +endif(PCRE) + +if(GKREGEX) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKREGEX") +endif(GKREGEX) + +if(GKRAND) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKRAND") +endif(GKRAND) + + +# Check for features. +check_include_file(execinfo.h HAVE_EXECINFO_H) +if(HAVE_EXECINFO_H) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_EXECINFO_H") +endif(HAVE_EXECINFO_H) + +check_function_exists(getline HAVE_GETLINE) +if(HAVE_GETLINE) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_GETLINE") +endif(HAVE_GETLINE) + + +# Custom check for TLS. +if(MSVC) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=__declspec(thread)") + + # This if checks if that value is cached or not. + if("${HAVE_THREADLOCALSTORAGE}" MATCHES "^${HAVE_THREADLOCALSTORAGE}$") + try_compile(HAVE_THREADLOCALSTORAGE + ${CMAKE_BINARY_DIR} + ${GKLIB_PATH}/conf/check_thread_storage.c) + if(HAVE_THREADLOCALSTORAGE) + message(STATUS "checking for thread-local storage - found") + else() + message(STATUS "checking for thread-local storage - not found") + endif() + endif() + if(NOT HAVE_THREADLOCALSTORAGE) + set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=") + endif() +endif() + +# Finally set the official C flags. +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GKlib_COPTIONS} ${GKlib_COPTS}") + +# Find GKlib sources. +file(GLOB GKlib_sources ${GKLIB_PATH}/*.c) +file(GLOB GKlib_includes ${GKLIB_PATH}/*.h) diff --git a/3rdParty/metis/metis-5.1.1/GKlib/LICENSE.txt b/3rdParty/metis/metis-5.1.1/GKlib/LICENSE.txt new file mode 100644 index 000000000..b61ca6f49 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/LICENSE.txt @@ -0,0 +1,18 @@ + +Copyright & License Notice +--------------------------- + +Copyright 1995-2018, Regents of the University of Minnesota + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/Makefile b/3rdParty/metis/metis-5.1.1/GKlib/Makefile new file mode 100644 index 000000000..c9543d44c --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/Makefile @@ -0,0 +1,80 @@ +# Configuration options. +cc = gcc +prefix = ~/local +openmp = not-set +gdb = not-set +assert = not-set +assert2 = not-set +debug = not-set +gprof = not-set +pcre = not-set +gkregex = not-set +gkrand = not-set + + +# Basically proxies everything to the builddir cmake. +cputype = $(shell uname -m | sed "s/\\ /_/g") +systype = $(shell uname -s) + +BUILDDIR = build/$(systype)-$(cputype) + +# Process configuration options. +CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1 +ifneq ($(gdb), not-set) + CONFIG_FLAGS += -DGDB=$(gdb) +endif +ifneq ($(assert), not-set) + CONFIG_FLAGS += -DASSERT=$(assert) +endif +ifneq ($(assert2), not-set) + CONFIG_FLAGS += -DASSERT2=$(assert2) +endif +ifneq ($(debug), not-set) + CONFIG_FLAGS += -DDEBUG=$(debug) +endif +ifneq ($(gprof), not-set) + CONFIG_FLAGS += -DGPROF=$(gprof) +endif +ifneq ($(openmp), not-set) + CONFIG_FLAGS += -DOPENMP=$(openmp) +endif +ifneq ($(pcre), not-set) + CONFIG_FLAGS += -DPCRE=$(pcre) +endif +ifneq ($(gkregex), not-set) + CONFIG_FLAGS += -DGKREGEX=$(pcre) +endif +ifneq ($(gkrand), not-set) + CONFIG_FLAGS += -DGKRAND=$(pcre) +endif +ifneq ($(prefix), not-set) + CONFIG_FLAGS += -DCMAKE_INSTALL_PREFIX=$(prefix) +endif +ifneq ($(cc), not-set) + CONFIG_FLAGS += -DCMAKE_C_COMPILER=$(cc) +endif + +define run-config +mkdir -p $(BUILDDIR) +cd $(BUILDDIR) && cmake $(CURDIR) $(CONFIG_FLAGS) +endef + +all clean install: $(BUILDDIR) + make -C $(BUILDDIR) $@ + +uninstall: + xargs rm < $(BUILDDIR)/install_manifest.txt + +$(BUILDDIR): + $(run-config) + +config: distclean + $(run-config) + +distclean: + rm -rf $(BUILDDIR) + +remake: + find . -name CMakeLists.txt -exec touch {} ';' + +.PHONY: config distclean all clean install uninstall remake diff --git a/3rdParty/metis/metis-5.1.1/GKlib/README.md b/3rdParty/metis/metis-5.1.1/GKlib/README.md new file mode 100644 index 000000000..f94eeea36 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/README.md @@ -0,0 +1,54 @@ +# GKlib +A library of various helper routines and frameworks used by many of the lab's software + +## Build requirements + - CMake 2.8, found at http://www.cmake.org/, as well as GNU make. + +Assuming that the above are available, two commands should suffice to +build the software: +``` +make config +make +``` + +## Configuring the build +It is primarily configured by passing options to make config. For example: +``` +make config cc=icc +``` + +would configure it to be built using icc. + +Configuration options are: +``` +cc=[compiler] - The C compiler to use [default: gcc] +prefix=[PATH] - Set the installation prefix [default: ~/local] +openmp=set - To build a version with OpenMP support +``` + + +## Building and installing +To build and install, run the following +``` +make +make install +``` + +By default, the library file, header file, and binaries will be installed in +``` +~/local/lib +~/local/include +~/local/bin +``` + +## Other make commands + make uninstall + Removes all files installed by 'make install'. + + make clean + Removes all object files but retains the configuration options. + + make distclean + Performs clean and completely removes the build directory. + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/b64.c b/3rdParty/metis/metis-5.1.1/GKlib/b64.c new file mode 100644 index 000000000..afacd68a1 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/b64.c @@ -0,0 +1,95 @@ +/*! +\file b64.c +\brief This file contains some simple 8bit-to-6bit encoding/deconding routines + +Most of these routines are outdated and should be converted using glibc's equivalent +routines. + +\date Started 2/22/05 +\author George +\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim + +\verbatim +$Copyright$ +$License$ +\endverbatim + +*/ + + +#include "GKlib.h" + +#define B64OFFSET 48 /* This is the '0' number */ + + +/****************************************************************************** +* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters +*******************************************************************************/ +void encodeblock(unsigned char *in, unsigned char *out) +{ + out[0] = (in[0] >> 2); + out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4)); + out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6)); + out[3] = (in[2] & 0x3f); + + out[0] += B64OFFSET; + out[1] += B64OFFSET; + out[2] += B64OFFSET; + out[3] += B64OFFSET; + +// printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]); +} + +/****************************************************************************** +* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes +*******************************************************************************/ +void decodeblock(unsigned char *in, unsigned char *out) +{ + in[0] -= B64OFFSET; + in[1] -= B64OFFSET; + in[2] -= B64OFFSET; + in[3] -= B64OFFSET; + + out[0] = (in[0] << 2 | in[1] >> 4); + out[1] = (in[1] << 4 | in[2] >> 2); + out[2] = (in[2] << 6 | in[3]); +} + + +/****************************************************************************** +* This function encodes an input array of bytes into a base64 encoding. Memory +* for the output array is assumed to have been allocated by the calling program +* and be sufficiently large. The output string is NULL terminated. +*******************************************************************************/ +void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer) +{ + int i, j; + + if (nbytes%3 != 0) + gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes); + + for (j=0, i=0; i<nbytes; i+=3, j+=4) + encodeblock(inbuffer+i, outbuffer+j); + +//printf("%d %d\n", nbytes, j); + outbuffer[j] = '\0'; +} + + + +/****************************************************************************** +* This function decodes an input array of base64 characters into their actual +* 8-bit codes. Memory * for the output array is assumed to have been allocated +* by the calling program and be sufficiently large. The padding is discarded. +*******************************************************************************/ +void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer) +{ + int i, j; + + if (nbytes%4 != 0) + gk_errexit(SIGERR, "GKDecodeBase64: Input buffer size should be a multiple of 4! (%d)\n", nbytes); + + for (j=0, i=0; i<nbytes; i+=4, j+=3) + decodeblock(inbuffer+i, outbuffer+j); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/blas.c b/3rdParty/metis/metis-5.1.1/GKlib/blas.c new file mode 100644 index 000000000..a0b95ca7a --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/blas.c @@ -0,0 +1,37 @@ +/*! +\file blas.c +\brief This file contains GKlib's implementation of BLAS-like routines + +The BLAS routines that are currently implemented are mostly level-one. +They follow a naming convention of the type gk_[type][name], where +[type] is one of c, i, f, and d, based on C's four standard scalar +datatypes of characters, integers, floats, and doubles. + +These routines are implemented using a generic macro template, +which is used for code generation. + +\date Started 9/28/95 +\author George +\version\verbatim $Id: blas.c 14330 2013-05-18 12:15:15Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + + +/*************************************************************************/ +/*! Use the templates to generate BLAS routines for the scalar data types */ +/*************************************************************************/ +GK_MKBLAS(gk_c, char, int) +GK_MKBLAS(gk_i, int, int) +GK_MKBLAS(gk_i32, int32_t, int32_t) +GK_MKBLAS(gk_i64, int64_t, int64_t) +GK_MKBLAS(gk_z, ssize_t, ssize_t) +GK_MKBLAS(gk_zu, size_t, size_t) +GK_MKBLAS(gk_f, float, float) +GK_MKBLAS(gk_d, double, double) +GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t) + + + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/cache.c b/3rdParty/metis/metis-5.1.1/GKlib/cache.c new file mode 100644 index 000000000..932e36d91 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/cache.c @@ -0,0 +1,126 @@ +/*! +\file +\brief Functions dealing with simulating cache behavior for performance + modeling and analysis; + +\date Started 4/13/18 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version $Id: cache.c 21991 2018-04-16 03:08:12Z karypis $ +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! This function creates a cache + */ +/*************************************************************************/ +gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits) +{ + gk_cache_t *cache; + + cache = (gk_cache_t *)gk_malloc(sizeof(gk_cache_t), "gk_cacheCreate: cache"); + memset(cache, 0, sizeof(gk_cache_t)); + + cache->nway = nway; + cache->lnbits = lnbits; + cache->cnbits = cnbits; + cache->csize = 1<<cnbits; + cache->cmask = cache->csize-1; + + cache->latimes = gk_ui64smalloc(cache->csize*nway, 0, "gk_cacheCreate: latimes"); + cache->clines = gk_zusmalloc(cache->csize*nway, 0, "gk_cacheCreate: clines"); + + return cache; +} + + +/*************************************************************************/ +/*! This function resets a cache + */ +/*************************************************************************/ +void gk_cacheReset(gk_cache_t *cache) +{ + cache->nhits = 0; + cache->nmisses = 0; + + gk_ui64set(cache->csize*cache->nway, 0, cache->latimes); + gk_zuset(cache->csize*cache->nway, 0, cache->clines); + + return; +} + + +/*************************************************************************/ +/*! This function destroys a cache. + */ +/*************************************************************************/ +void gk_cacheDestroy(gk_cache_t **r_cache) +{ + gk_cache_t *cache = *r_cache; + + if (cache == NULL) + return; + + gk_free((void **)&cache->clines, &cache->latimes, &cache, LTERM); + + *r_cache = NULL; +} + + +/*************************************************************************/ +/*! This function simulates a load(ptr) operation. + */ +/*************************************************************************/ +int gk_cacheLoad(gk_cache_t *cache, size_t addr) +{ + uint32_t i, nway=cache->nway; + size_t lru=0; + + //printf("%16"PRIx64" ", (uint64_t)addr); + addr = addr>>(cache->lnbits); + //printf("%16"PRIx64" %16"PRIx64" %16"PRIx64" ", (uint64_t)addr, (uint64_t)addr&(cache->cmask), (uint64_t)cache->cmask); + + size_t *clines = cache->clines + (addr&(cache->cmask)); + uint64_t *latimes = cache->latimes + (addr&(cache->cmask)); + + cache->clock++; + for (i=0; i<nway; i++) { /* look for hits */ + if (clines[i] == addr) { + cache->nhits++; + latimes[i] = cache->clock; + goto DONE; + } + } + + for (i=0; i<nway; i++) { /* look for empty spots or the lru spot */ + if (clines[i] == 0) { + lru = i; + break; + } + else if (latimes[i] < latimes[lru]) { + lru = i; + } + } + + /* initial fill or replace */ + cache->nmisses++; + clines[lru] = addr; + latimes[lru] = cache->clock; + +DONE: + //printf(" %"PRIu64" %"PRIu64"\n", cache->nhits, cache->clock); + return 1; +} + + +/*************************************************************************/ +/*! This function returns the cache's hitrate + */ +/*************************************************************************/ +double gk_cacheGetHitRate(gk_cache_t *cache) +{ + return ((double)cache->nhits)/((double)(cache->clock+1)); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/conf/check_thread_storage.c b/3rdParty/metis/metis-5.1.1/GKlib/conf/check_thread_storage.c new file mode 100644 index 000000000..e6e1e980e --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/conf/check_thread_storage.c @@ -0,0 +1,5 @@ +extern __thread int x; + +int main(int argc, char **argv) { + return 0; +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/csr.c b/3rdParty/metis/metis-5.1.1/GKlib/csr.c new file mode 100644 index 000000000..7e92a0c31 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/csr.c @@ -0,0 +1,3378 @@ +/*! + * \file + * + * \brief Various routines with dealing with CSR matrices + * + * \author George Karypis + * \version\verbatim $Id: csr.c 21044 2017-05-24 22:50:32Z karypis $ \endverbatim + */ + +#include <GKlib.h> + +#define OMPMINOPS 50000 + +/*************************************************************************/ +/*! Allocate memory for a CSR matrix and initializes it + \returns the allocated matrix. The various fields are set to NULL. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Create() +{ + gk_csr_t *mat=NULL; + + if ((mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat"))) + gk_csr_Init(mat); + + return mat; +} + + +/*************************************************************************/ +/*! Initializes the matrix + \param mat is the matrix to be initialized. +*/ +/*************************************************************************/ +void gk_csr_Init(gk_csr_t *mat) +{ + memset(mat, 0, sizeof(gk_csr_t)); + mat->nrows = mat->ncols = 0; +} + + +/*************************************************************************/ +/*! Frees all the memory allocated for matrix. + \param mat is the matrix to be freed. +*/ +/*************************************************************************/ +void gk_csr_Free(gk_csr_t **mat) +{ + if (*mat == NULL) + return; + gk_csr_FreeContents(*mat); + gk_free((void **)mat, LTERM); +} + + +/*************************************************************************/ +/*! Frees only the memory allocated for the matrix's different fields and + sets them to NULL. + \param mat is the matrix whose contents will be freed. +*/ +/*************************************************************************/ +void gk_csr_FreeContents(gk_csr_t *mat) +{ + gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, + &mat->rowids, &mat->rlabels, &mat->rmap, + &mat->colptr, &mat->colind, &mat->colval, + &mat->colids, &mat->clabels, &mat->cmap, + &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums, + &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols, + &mat->rwgts, &mat->cwgts, + LTERM); +} + + +/*************************************************************************/ +/*! Returns a copy of a matrix. + \param mat is the matrix to be duplicated. + \returns the newly created copy of the matrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Dup(gk_csr_t *mat) +{ + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = mat->nrows; + nmat->ncols = mat->ncols; + + /* copy the row structure */ + if (mat->rowptr) + nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr, + gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr")); + if (mat->rowids) + nmat->rowids = gk_icopy(mat->nrows, mat->rowids, + gk_imalloc(mat->nrows, "gk_csr_Dup: rowids")); + if (mat->rlabels) + nmat->rlabels = gk_icopy(mat->nrows, mat->rlabels, + gk_imalloc(mat->nrows, "gk_csr_Dup: rlabels")); + if (mat->rnorms) + nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms")); + if (mat->rsums) + nmat->rsums = gk_fcopy(mat->nrows, mat->rsums, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rsums")); + if (mat->rsizes) + nmat->rsizes = gk_fcopy(mat->nrows, mat->rsizes, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rsizes")); + if (mat->rvols) + nmat->rvols = gk_fcopy(mat->nrows, mat->rvols, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rvols")); + if (mat->rwgts) + nmat->rwgts = gk_fcopy(mat->nrows, mat->rwgts, + gk_fmalloc(mat->nrows, "gk_csr_Dup: rwgts")); + if (mat->rowind) + nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind, + gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind")); + if (mat->rowval) + nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval, + gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval")); + + /* copy the col structure */ + if (mat->colptr) + nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr, + gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr")); + if (mat->colids) + nmat->colids = gk_icopy(mat->ncols, mat->colids, + gk_imalloc(mat->ncols, "gk_csr_Dup: colids")); + if (mat->clabels) + nmat->clabels = gk_icopy(mat->ncols, mat->clabels, + gk_imalloc(mat->ncols, "gk_csr_Dup: clabels")); + if (mat->cnorms) + nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms")); + if (mat->csums) + nmat->csums = gk_fcopy(mat->ncols, mat->csums, + gk_fmalloc(mat->ncols, "gk_csr_Dup: csums")); + if (mat->csizes) + nmat->csizes = gk_fcopy(mat->ncols, mat->csizes, + gk_fmalloc(mat->ncols, "gk_csr_Dup: csizes")); + if (mat->cvols) + nmat->cvols = gk_fcopy(mat->ncols, mat->cvols, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cvols")); + if (mat->cwgts) + nmat->cwgts = gk_fcopy(mat->ncols, mat->cwgts, + gk_fmalloc(mat->ncols, "gk_csr_Dup: cwgts")); + if (mat->colind) + nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind, + gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind")); + if (mat->colval) + nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval, + gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval")); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix containint a set of consecutive rows. + \param mat is the original matrix. + \param rstart is the starting row. + \param nrows is the number of rows from rstart to extract. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows) +{ + ssize_t i; + gk_csr_t *nmat; + + if (rstart+nrows > mat->nrows) + return NULL; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = mat->ncols; + + /* copy the row structure */ + if (mat->rowptr) + nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart, + gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr")); + for (i=nrows; i>=0; i--) + nmat->rowptr[i] -= nmat->rowptr[0]; + ASSERT(nmat->rowptr[0] == 0); + + if (mat->rowids) + nmat->rowids = gk_icopy(nrows, mat->rowids+rstart, + gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids")); + if (mat->rnorms) + nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart, + gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms")); + + if (mat->rsums) + nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart, + gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums")); + + ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]); + if (mat->rowind) + nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + mat->rowind+mat->rowptr[rstart], + gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + "gk_csr_ExtractSubmatrix: rowind")); + if (mat->rowval) + nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + mat->rowval+mat->rowptr[rstart], + gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], + "gk_csr_ExtractSubmatrix: rowval")); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix containing a certain set of rows. + \param mat is the original matrix. + \param nrows is the number of rows to extract. + \param rind is the set of row numbers to extract. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind) +{ + ssize_t i, ii, j, nnz; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = mat->ncols; + + for (nnz=0, i=0; i<nrows; i++) + nnz += mat->rowptr[rind[i]+1]-mat->rowptr[rind[i]]; + + nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr"); + nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind"); + nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval"); + + nmat->rowptr[0] = 0; + for (nnz=0, j=0, ii=0; ii<nrows; ii++) { + i = rind[ii]; + gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz); + gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz); + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + nmat->rowptr[++j] = nnz; + } + ASSERT(j == nmat->nrows); + + return nmat; +} + + +/*************************************************************************/ +/*! Returns a submatrix corresponding to a specified partitioning of rows. + \param mat is the original matrix. + \param part is the partitioning vector of the rows. + \param pid is the partition ID that will be extracted. + \returns the row structure of the newly created submatrix. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid) +{ + ssize_t i, j, nnz; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = 0; + nmat->ncols = mat->ncols; + + for (nnz=0, i=0; i<mat->nrows; i++) { + if (part[i] == pid) { + nmat->nrows++; + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + } + } + + nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr"); + nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind"); + nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval"); + + nmat->rowptr[0] = 0; + for (nnz=0, j=0, i=0; i<mat->nrows; i++) { + if (part[i] == pid) { + gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz); + gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz); + nnz += mat->rowptr[i+1]-mat->rowptr[i]; + nmat->rowptr[++j] = nnz; + } + } + ASSERT(j == nmat->nrows); + + return nmat; +} + + +/*************************************************************************/ +/*! Splits the matrix into multiple sub-matrices based on the provided + color array. + \param mat is the original matrix. + \param color is an array of size equal to the number of non-zeros + in the matrix (row-wise structure). The matrix is split into + as many parts as the number of colors. For meaningfull results, + the colors should be numbered consecutively starting from 0. + \returns an array of matrices for each supplied color number. +*/ +/**************************************************************************/ +gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color) +{ + ssize_t i, j; + int nrows, ncolors; + ssize_t *rowptr; + int *rowind; + float *rowval; + gk_csr_t **smats; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + ncolors = gk_imax(rowptr[nrows], color, 1)+1; + + smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats"); + for (i=0; i<ncolors; i++) { + smats[i] = gk_csr_Create(); + smats[i]->nrows = mat->nrows; + smats[i]->ncols = mat->ncols; + smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr"); + } + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + smats[color[j]]->rowptr[i]++; + } + for (i=0; i<ncolors; i++) + MAKECSR(j, nrows, smats[i]->rowptr); + + for (i=0; i<ncolors; i++) { + smats[i]->rowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind"); + smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval"); + } + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + smats[color[j]]->rowind[smats[color[j]]->rowptr[i]] = rowind[j]; + smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j]; + smats[color[j]]->rowptr[i]++; + } + } + + for (i=0; i<ncolors; i++) + SHIFTCSR(j, nrows, smats[i]->rowptr); + + return smats; +} + + +/**************************************************************************/ +/*! Determines the format of the CSR matrix based on the extension. + \param filename is the name of the file. + \param the user-supplied format. + \returns the type. The extension of the file directly maps to the + name of the format. +*/ +/**************************************************************************/ +int gk_csr_DetermineFormat(char *filename, int format) +{ + if (format != GK_CSR_FMT_AUTO) + return format; + + format = GK_CSR_FMT_CSR; + char *extension = gk_getextname(filename); + + if (!strcmp(extension, "csr")) + format = GK_CSR_FMT_CSR; + else if (!strcmp(extension, "ijv")) + format = GK_CSR_FMT_IJV; + else if (!strcmp(extension, "cluto")) + format = GK_CSR_FMT_CLUTO; + else if (!strcmp(extension, "metis")) + format = GK_CSR_FMT_METIS; + else if (!strcmp(extension, "binrow")) + format = GK_CSR_FMT_BINROW; + else if (!strcmp(extension, "bincol")) + format = GK_CSR_FMT_BINCOL; + else if (!strcmp(extension, "bijv")) + format = GK_CSR_FMT_BIJV; + + gk_free((void **)&extension, LTERM); + + return format; +} + + +/**************************************************************************/ +/*! Reads a CSR matrix from the supplied file and stores it the matrix's + forward structure. + \param filename is the file that stores the data. + \param format is either GK_CSR_FMT_METIS, GK_CSR_FMT_CLUTO, + GK_CSR_FMT_CSR, GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL + specifying the type of the input format. + The GK_CSR_FMT_CSR does not contain a header + line, whereas the GK_CSR_FMT_BINROW is a binary format written + by gk_csr_Write() using the same format specifier. + \param readvals is either 1 or 0, indicating if the CSR file contains + values or it does not. It only applies when GK_CSR_FMT_CSR is + used. + \param numbering is either 1 or 0, indicating if the numbering of the + indices start from 1 or 0, respectively. If they start from 1, + they are automatically decreamented during input so that they + will start from 0. It only applies when GK_CSR_FMT_CSR is + used. + \returns the matrix that was read. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering) +{ + ssize_t i, k, l; + size_t nfields, nrows, ncols, nnz, fmt, ncon; + size_t lnlen; + ssize_t *rowptr; + int *rowind, *iinds, *jinds, ival; + float *rowval=NULL, *vals, fval; + int readsizes, readwgts; + char *line=NULL, *head, *tail, fmtstr[256]; + FILE *fpin; + gk_csr_t *mat=NULL; + + format = gk_csr_DetermineFormat(filename, format); + + if (!gk_fexists(filename)) + gk_errexit(SIGERR, "File %s does not exist!\n", filename); + + switch (format) { + case GK_CSR_FMT_BINROW: + mat = gk_csr_Create(); + + fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin"); + if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr"); + if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1) + gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename); + mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind"); + if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows]) + gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename); + if (readvals == 1) { + mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval"); + if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows]) + gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename); + } + + gk_fclose(fpin); + return mat; + + break; + + case GK_CSR_FMT_BINCOL: + mat = gk_csr_Create(); + + fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin"); + if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr"); + if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1) + gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename); + mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind"); + if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols]) + gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename); + if (readvals) { + mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval"); + if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols]) + gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename); + } + + gk_fclose(fpin); + return mat; + + break; + + + case GK_CSR_FMT_IJV: + gk_getfilestats(filename, &nrows, &nnz, NULL, NULL); + + if (readvals == 1 && 3*nrows != nnz) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nnz, readvals); + if (readvals == 0 && 2*nrows != nnz) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nnz, readvals); + + nnz = nrows; + numbering = (numbering ? - 1 : 0); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nnz, "iinds"); + jinds = gk_i32malloc(nnz, "jinds"); + vals = (readvals ? gk_fmalloc(nnz, "vals") : NULL); + + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + for (nrows=0, ncols=0, i=0; i<nnz; i++) { + if (readvals) { + if (fscanf(fpin, "%d %d %f", &iinds[i], &jinds[i], &vals[i]) != 3) + gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nnz: %zd.\n", i); + } + else { + if (fscanf(fpin, "%d %d", &iinds[i], &jinds[i]) != 2) + gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nnz: %zd.\n", i); + } + iinds[i] += numbering; + jinds[i] += numbering; + + if (nrows < iinds[i]) + nrows = iinds[i]; + if (ncols < jinds[i]) + ncols = jinds[i]; + } + nrows++; + ncols++; + gk_fclose(fpin); + + /* convert (i, j, v) into a CSR matrix */ + mat = gk_csr_Create(); + mat->nrows = nrows; + mat->ncols = ncols; + rowptr = mat->rowptr = gk_zsmalloc(nrows+1, 0, "rowptr"); + rowind = mat->rowind = gk_i32malloc(nnz, "rowind"); + if (readvals) + rowval = mat->rowval = gk_fmalloc(nnz, "rowval"); + + for (i=0; i<nnz; i++) + rowptr[iinds[i]]++; + MAKECSR(i, nrows, rowptr); + + for (i=0; i<nnz; i++) { + rowind[rowptr[iinds[i]]] = jinds[i]; + if (readvals) + rowval[rowptr[iinds[i]]] = vals[i]; + rowptr[iinds[i]]++; + } + SHIFTCSR(i, nrows, rowptr); + + gk_free((void **)&iinds, &jinds, &vals, LTERM); + + return mat; + + break; + + case GK_CSR_FMT_BIJV: + mat = gk_csr_Create(); + + fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin"); + + if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename); + if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename); + if (fread(&nnz, sizeof(size_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the nnz from file %s!\n", filename); + if (fread(&readvals, sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read the readvals from file %s!\n", filename); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nnz, "iinds"); + jinds = gk_i32malloc(nnz, "jinds"); + vals = (readvals ? gk_fmalloc(nnz, "vals") : NULL); + + for (i=0; i<nnz; i++) { + if (fread(&(iinds[i]), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read iinds[i] from file %s!\n", filename); + if (fread(&(jinds[i]), sizeof(int32_t), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read jinds[i] from file %s!\n", filename); + if (readvals) { + if (fread(&(vals[i]), sizeof(float), 1, fpin) != 1) + gk_errexit(SIGERR, "Failed to read vals[i] from file %s!\n", filename); + } + //printf("%d %d\n", iinds[i], jinds[i]); + } + gk_fclose(fpin); + + /* convert (i, j, v) into a CSR matrix */ + rowptr = mat->rowptr = gk_zsmalloc(mat->nrows+1, 0, "rowptr"); + rowind = mat->rowind = gk_i32malloc(nnz, "rowind"); + if (readvals) + rowval = mat->rowval = gk_fmalloc(nnz, "rowval"); + + for (i=0; i<nnz; i++) + rowptr[iinds[i]]++; + MAKECSR(i, mat->nrows, rowptr); + + for (i=0; i<nnz; i++) { + rowind[rowptr[iinds[i]]] = jinds[i]; + if (readvals) + rowval[rowptr[iinds[i]]] = vals[i]; + rowptr[iinds[i]]++; + } + SHIFTCSR(i, mat->nrows, rowptr); + + gk_free((void **)&iinds, &jinds, &vals, LTERM); + + return mat; + + break; + + + /* the following are handled by a common input code, that comes after the switch */ + + case GK_CSR_FMT_CLUTO: + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3) + gk_errexit(SIGERR, "Header line must contain 3 integers.\n"); + + readsizes = 0; + readwgts = 0; + readvals = 1; + numbering = 1; + + break; + + case GK_CSR_FMT_METIS: + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + fmt = ncon = 0; + nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon); + if (nfields < 2) + gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); + + ncols = nrows; + nnz *= 2; + + if (fmt > 111) + gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); + + sprintf(fmtstr, "%03zu", fmt%1000); + readsizes = (fmtstr[0] == '1'); + readwgts = (fmtstr[1] == '1'); + readvals = (fmtstr[2] == '1'); + numbering = 1; + ncon = (ncon == 0 ? 1 : ncon); + + break; + + case GK_CSR_FMT_CSR: + readsizes = 0; + readwgts = 0; + + gk_getfilestats(filename, &nrows, &nnz, NULL, NULL); + + if (readvals == 1 && nnz%2 == 1) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals); + if (readvals == 1) + nnz = nnz/2; + fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin"); + + break; + + default: + gk_errexit(SIGERR, "Unknown csr format.\n"); + return NULL; + } + + mat = gk_csr_Create(); + + mat->nrows = nrows; + + rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr"); + rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind"); + if (readvals != 2) + rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval"); + + if (readsizes) + mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes"); + + if (readwgts) + mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts"); + + /*---------------------------------------------------------------------- + * Read the sparse matrix file + *---------------------------------------------------------------------*/ + numbering = (numbering ? -1 : 0); + for (ncols=0, rowptr[0]=0, k=0, i=0; i<nrows; i++) { + do { + if (gk_getline(&line, &lnlen, fpin) == -1) + gk_errexit(SIGERR, "Premature end of input file: file while reading row %d\n", i); + } while (line[0] == '%'); + + head = line; + tail = NULL; + + /* Read vertex sizes */ + if (readsizes) { +#ifdef __MSC__ + mat->rsizes[i] = (float)strtod(head, &tail); +#else + mat->rsizes[i] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (mat->rsizes[i] < 0) + errexit("The size for vertex %zd must be >= 0\n", i+1); + head = tail; + } + + /* Read vertex weights */ + if (readwgts) { + for (l=0; l<ncon; l++) { +#ifdef __MSC__ + mat->rwgts[i*ncon+l] = (float)strtod(head, &tail); +#else + mat->rwgts[i*ncon+l] = strtof(head, &tail); +#endif + if (tail == head) + errexit("The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (mat->rwgts[i*ncon+l] < 0) + errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + head = tail; + } + } + + + /* Read the rest of the row */ + while (1) { + ival = (int)strtol(head, &tail, 0); + if (tail == head) + break; + head = tail; + + if ((rowind[k] = ival + numbering) < 0) + gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); + + ncols = gk_max(rowind[k], ncols); + + if (readvals == 1) { +#ifdef __MSC__ + fval = (float)strtod(head, &tail); +#else + fval = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k); + head = tail; + + rowval[k] = fval; + } + k++; + } + rowptr[i+1] = k; + } + + if (format == GK_CSR_FMT_METIS) { + ASSERT(ncols+1 == mat->nrows); + mat->ncols = mat->nrows; + } + else { + mat->ncols = ncols+1; + } + + if (k != nnz) + gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in " + "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k); + + gk_fclose(fpin); + + gk_free((void **)&line, LTERM); + + return mat; +} + + +/**************************************************************************/ +/*! Writes the row-based structure of a matrix into a file. + \param mat is the matrix to be written, + \param filename is the name of the output file. + \param format is one of: GK_CSR_FMT_CLUTO, GK_CSR_FMT_CSR, + GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL, GK_CSR_FMT_BIJV. + \param writevals is either 1 or 0 indicating if the values will be + written or not. This is only applicable when GK_CSR_FMT_CSR + is used. + \param numbering is either 1 or 0 indicating if the internal 0-based + numbering will be shifted by one or not during output. This + is only applicable when GK_CSR_FMT_CSR is used. +*/ +/**************************************************************************/ +void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering) +{ + ssize_t i, j; + int32_t edge[2]; + FILE *fpout; + + format = gk_csr_DetermineFormat(filename, format); + + switch (format) { + case GK_CSR_FMT_METIS: + if (mat->nrows != mat->ncols || mat->rowptr[mat->nrows]%2 == 1) + gk_errexit(SIGERR, "METIS output format requires a square symmetric matrix.\n"); + + if (filename) + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + else + fpout = stdout; + + fprintf(fpout, "%d %zd\n", mat->nrows, mat->rowptr[mat->nrows]/2); + for (i=0; i<mat->nrows; i++) { + for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) + fprintf(fpout, " %d", mat->rowind[j]+1); + fprintf(fpout, "\n"); + } + if (filename) + gk_fclose(fpout); + break; + + case GK_CSR_FMT_BINROW: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout); + fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout); + if (writevals) + fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout); + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_BINCOL: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout); + fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout); + if (writevals) + fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout); + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_IJV: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + + numbering = (numbering ? 1 : 0); + for (i=0; i<mat->nrows; i++) { + for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { + if (writevals) + fprintf(fpout, "%zd %d %.8f\n", i+numbering, mat->rowind[j]+numbering, mat->rowval[j]); + else + fprintf(fpout, "%zd %d\n", i+numbering, mat->rowind[j]+numbering); + } + } + + gk_fclose(fpout); + return; + + break; + + case GK_CSR_FMT_BIJV: + if (filename == NULL) + gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n"); + fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout"); + + fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); + fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); + fwrite(&(mat->rowptr[mat->nrows]), sizeof(size_t), 1, fpout); + fwrite(&writevals, sizeof(int32_t), 1, fpout); + + for (i=0; i<mat->nrows; i++) { + edge[0] = i; + for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { + edge[1] = mat->rowind[j]; + fwrite(edge, sizeof(int32_t), 2, fpout); + if (writevals) + fwrite(&(mat->rowval[j]), sizeof(float), 1, fpout); + } + } + + gk_fclose(fpout); + return; + + break; + + default: + if (filename) + fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout"); + else + fpout = stdout; + + if (format == GK_CSR_FMT_CLUTO) { + fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]); + writevals = 1; + numbering = 1; + } + + for (i=0; i<mat->nrows; i++) { + for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { + fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0)); + if (writevals) + fprintf(fpout, " %f", mat->rowval[j]); + } + fprintf(fpout, "\n"); + } + if (filename) + gk_fclose(fpout); + } +} + + +/*************************************************************************/ +/*! Prunes certain rows/columns of the matrix. The prunning takes place + by analyzing the row structure of the matrix. The prunning takes place + by removing rows/columns but it does not affect the numbering of the + remaining rows/columns. + + \param mat the matrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the matrix will be prunned, + \param minf is the minimum number of rows (columns) that a column (row) must + be present in order to be kept, + \param maxf is the maximum number of rows (columns) that a column (row) must + be present at in order to be kept. + \returns the prunned matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf) +{ + ssize_t i, j, nnz; + int nrows, ncols; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind, *collen; + float *rowval, *nrowval; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nrows = nmat->nrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval"); + + + switch (what) { + case GK_CSR_COL: + collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen"); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + ASSERT(rowind[j] < ncols); + collen[rowind[j]]++; + } + } + for (i=0; i<ncols; i++) + collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0); + + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (collen[rowind[j]]) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + gk_free((void **)&collen, LTERM); + break; + + case GK_CSR_ROW: + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) { + for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + } + } + nrowptr[i+1] = nnz; + } + break; + + default: + gk_csr_Free(&nmat); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return nmat; +} + + +/*************************************************************************/ +/*! Eliminates certain entries from the rows/columns of the matrix. The + filtering takes place by keeping only the highest weight entries whose + sum accounts for a certain fraction of the overall weight of the + row/column. + + \param mat the matrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the matrix will be prunned, + \param norm indicates the norm that will be used to aggregate the weights + and possible values are 1 or 2, + \param fraction is the fraction of the overall norm that will be retained + by the kept entries. + \returns the filtered matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction) +{ + ssize_t i, j, nnz; + int nrows, ncols, ncand, maxlen=0; + ssize_t *rowptr, *colptr, *nrowptr; + int *rowind, *colind, *nrowind; + float *rowval, *colval, *nrowval, rsum, tsum; + gk_csr_t *nmat; + gk_fkv_t *cand; + + nmat = gk_csr_Create(); + + nrows = nmat->nrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + colptr = mat->colptr; + colind = mat->colind; + colval = mat->colval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + if (mat->colptr == NULL) + gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n"); + + gk_zcopy(nrows+1, rowptr, nrowptr); + + for (i=0; i<ncols; i++) + maxlen = gk_max(maxlen, colptr[i+1]-colptr[i]); + + #pragma omp parallel private(i, j, ncand, rsum, tsum, cand) + { + cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand"); + + #pragma omp for schedule(static) + for (i=0; i<ncols; i++) { + for (tsum=0.0, ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) { + cand[ncand].val = colind[j]; + cand[ncand].key = colval[j]; + tsum += (norm == 1 ? colval[j] : colval[j]*colval[j]); + } + gk_fkvsortd(ncand, cand); + + for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) { + rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key); + nrowind[nrowptr[cand[j].val]] = i; + nrowval[nrowptr[cand[j].val]] = cand[j].key; + nrowptr[cand[j].val]++; + } + } + + gk_free((void **)&cand, LTERM); + } + + /* compact the nrowind/nrowval */ + for (nnz=0, i=0; i<nrows; i++) { + for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) { + nrowind[nnz] = nrowind[j]; + nrowval[nnz] = nrowval[j]; + } + nrowptr[i] = nnz; + } + SHIFTCSR(i, nrows, nrowptr); + + break; + + case GK_CSR_ROW: + if (mat->rowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + for (i=0; i<nrows; i++) + maxlen = gk_max(maxlen, rowptr[i+1]-rowptr[i]); + + #pragma omp parallel private(i, j, ncand, rsum, tsum, cand) + { + cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand"); + + #pragma omp for schedule(static) + for (i=0; i<nrows; i++) { + for (tsum=0.0, ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) { + cand[ncand].val = rowind[j]; + cand[ncand].key = rowval[j]; + tsum += (norm == 1 ? rowval[j] : rowval[j]*rowval[j]); + } + gk_fkvsortd(ncand, cand); + + for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) { + rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key); + nrowind[rowptr[i]+j] = cand[j].val; + nrowval[rowptr[i]+j] = cand[j].key; + } + nrowptr[i+1] = rowptr[i]+j; + } + + gk_free((void **)&cand, LTERM); + } + + /* compact nrowind/nrowval */ + nrowptr[0] = nnz = 0; + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<nrowptr[i+1]; j++, nnz++) { + nrowind[nnz] = nrowind[j]; + nrowval[nnz] = nrowval[j]; + } + nrowptr[i+1] = nnz; + } + + break; + + default: + gk_csr_Free(&nmat); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return nmat; +} + + +/*************************************************************************/ +/*! Eliminates certain entries from the rows/columns of the matrix. The + filtering takes place by keeping only the highest weight top-K entries + along each row/column and those entries whose weight is greater than + a specified value. + + \param mat the matrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the matrix will be prunned, + \param topk is the number of the highest weight entries to keep. + \param keepval is the weight of a term above which will be kept. This + is used to select additional terms past the first topk. + \returns the filtered matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval) +{ + ssize_t i, j, k, nnz; + int nrows, ncols, ncand; + ssize_t *rowptr, *colptr, *nrowptr; + int *rowind, *colind, *nrowind; + float *rowval, *colval, *nrowval; + gk_csr_t *nmat; + gk_fkv_t *cand; + + nmat = gk_csr_Create(); + + nrows = nmat->nrows = mat->nrows; + ncols = nmat->ncols = mat->ncols; + + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + colptr = mat->colptr; + colind = mat->colind; + colval = mat->colval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + if (mat->colptr == NULL) + gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n"); + + cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand"); + + gk_zcopy(nrows+1, rowptr, nrowptr); + for (i=0; i<ncols; i++) { + for (ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) { + cand[ncand].val = colind[j]; + cand[ncand].key = colval[j]; + } + gk_fkvsortd(ncand, cand); + + k = gk_min(topk, ncand); + for (j=0; j<k; j++) { + nrowind[nrowptr[cand[j].val]] = i; + nrowval[nrowptr[cand[j].val]] = cand[j].key; + nrowptr[cand[j].val]++; + } + for (; j<ncand; j++) { + if (cand[j].key < keepval) + break; + + nrowind[nrowptr[cand[j].val]] = i; + nrowval[nrowptr[cand[j].val]] = cand[j].key; + nrowptr[cand[j].val]++; + } + } + + /* compact the nrowind/nrowval */ + for (nnz=0, i=0; i<nrows; i++) { + for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) { + nrowind[nnz] = nrowind[j]; + nrowval[nnz] = nrowval[j]; + } + nrowptr[i] = nnz; + } + SHIFTCSR(i, nrows, nrowptr); + + gk_free((void **)&cand, LTERM); + break; + + case GK_CSR_ROW: + if (mat->rowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand"); + + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + for (ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) { + cand[ncand].val = rowind[j]; + cand[ncand].key = rowval[j]; + } + gk_fkvsortd(ncand, cand); + + k = gk_min(topk, ncand); + for (j=0; j<k; j++, nnz++) { + nrowind[nnz] = cand[j].val; + nrowval[nnz] = cand[j].key; + } + for (; j<ncand; j++, nnz++) { + if (cand[j].key < keepval) + break; + + nrowind[nnz] = cand[j].val; + nrowval[nnz] = cand[j].key; + } + nrowptr[i+1] = nnz; + } + + gk_free((void **)&cand, LTERM); + break; + + default: + gk_csr_Free(&nmat); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return nmat; +} + + +/*************************************************************************/ +/*! Eliminates certain entries from the rows/columns of the matrix. The + filtering takes place by keeping only the terms whose contribution to + the total length of the document is greater than a user-splied multiple + over the average. + + This routine assumes that the vectors are normalized to be unit length. + + \param mat the matrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the matrix will be prunned, + \param zscore is the multiplicative factor over the average contribution + to the length of the document. + \returns the filtered matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore) +{ + ssize_t i, j, nnz; + int nrows; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind; + float *rowval, *nrowval, avgwgt; + gk_csr_t *nmat; + + nmat = gk_csr_Create(); + + nmat->nrows = mat->nrows; + nmat->ncols = mat->ncols; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval"); + + + switch (what) { + case GK_CSR_COL: + gk_errexit(SIGERR, "This has not been implemented yet.\n"); + break; + + case GK_CSR_ROW: + if (mat->rowptr == NULL) + gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n"); + + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + avgwgt = zscore/(rowptr[i+1]-rowptr[i]); + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] > avgwgt) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + break; + + default: + gk_csr_Free(&nmat); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return nmat; +} + + +/*************************************************************************/ +/*! Compacts the column-space of the matrix by removing empty columns. + As a result of the compaction, the column numbers are renumbered. + The compaction operation is done in place and only affects the row-based + representation of the matrix. + The new columns are ordered in decreasing frequency. + + \param mat the matrix whose empty columns will be removed. +*/ +/**************************************************************************/ +void gk_csr_CompactColumns(gk_csr_t *mat) +{ + ssize_t i; + int nrows, ncols, nncols; + ssize_t *rowptr; + int *rowind, *colmap; + gk_ikv_t *clens; + + nrows = mat->nrows; + ncols = mat->ncols; + rowptr = mat->rowptr; + rowind = mat->rowind; + + colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap"); + + clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens"); + for (i=0; i<ncols; i++) { + clens[i].key = 0; + clens[i].val = i; + } + + for (i=0; i<rowptr[nrows]; i++) + clens[rowind[i]].key++; + gk_ikvsortd(ncols, clens); + + for (nncols=0, i=0; i<ncols; i++) { + if (clens[i].key > 0) + colmap[clens[i].val] = nncols++; + else + break; + } + + for (i=0; i<rowptr[nrows]; i++) + rowind[i] = colmap[rowind[i]]; + + mat->ncols = nncols; + + gk_free((void **)&colmap, &clens, LTERM); +} + + +/*************************************************************************/ +/*! Sorts the indices in increasing order + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which set of + indices to sort. +*/ +/**************************************************************************/ +void gk_csr_SortIndices(gk_csr_t *mat, int what) +{ + int n, nn=0; + ssize_t *ptr; + int *ind; + float *val; + + switch (what) { + case GK_CSR_ROW: + if (!mat->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + + n = mat->nrows; + ptr = mat->rowptr; + ind = mat->rowind; + val = mat->rowval; + break; + + case GK_CSR_COL: + if (!mat->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + + n = mat->ncols; + ptr = mat->colptr; + ind = mat->colind; + val = mat->colval; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return; + } + + #pragma omp parallel if (n > 100) + { + ssize_t i, j, k; + gk_ikv_t *cand; + float *tval; + + #pragma omp single + for (i=0; i<n; i++) + nn = gk_max(nn, ptr[i+1]-ptr[i]); + + cand = gk_ikvmalloc(nn, "gk_csr_SortIndices: cand"); + tval = gk_fmalloc(nn, "gk_csr_SortIndices: tval"); + + #pragma omp for schedule(static) + for (i=0; i<n; i++) { + for (k=0, j=ptr[i]; j<ptr[i+1]; j++) { + if (j > ptr[i] && ind[j] < ind[j-1]) + k = 1; /* an inversion */ + cand[j-ptr[i]].val = j-ptr[i]; + cand[j-ptr[i]].key = ind[j]; + tval[j-ptr[i]] = val[j]; + } + if (k) { + gk_ikvsorti(ptr[i+1]-ptr[i], cand); + for (j=ptr[i]; j<ptr[i+1]; j++) { + ind[j] = cand[j-ptr[i]].key; + val[j] = tval[cand[j-ptr[i]].val]; + } + } + } + + gk_free((void **)&cand, &tval, LTERM); + } + +} + + +/*************************************************************************/ +/*! Creates a row/column index from the column/row data. + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which index + will be created. +*/ +/**************************************************************************/ +void gk_csr_CreateIndex(gk_csr_t *mat, int what) +{ + /* 'f' stands for forward, 'r' stands for reverse */ + ssize_t i, j, k, nf, nr; + ssize_t *fptr, *rptr; + int *find, *rind; + float *fval, *rval; + + switch (what) { + case GK_CSR_COL: + nf = mat->nrows; + fptr = mat->rowptr; + find = mat->rowind; + fval = mat->rowval; + + if (mat->colptr) gk_free((void **)&mat->colptr, LTERM); + if (mat->colind) gk_free((void **)&mat->colind, LTERM); + if (mat->colval) gk_free((void **)&mat->colval, LTERM); + + nr = mat->ncols; + rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr"); + rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind"); + rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL); + break; + case GK_CSR_ROW: + nf = mat->ncols; + fptr = mat->colptr; + find = mat->colind; + fval = mat->colval; + + if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM); + if (mat->rowind) gk_free((void **)&mat->rowind, LTERM); + if (mat->rowval) gk_free((void **)&mat->rowval, LTERM); + + nr = mat->nrows; + rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr"); + rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind"); + rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL); + break; + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return; + } + + + for (i=0; i<nf; i++) { + for (j=fptr[i]; j<fptr[i+1]; j++) + rptr[find[j]]++; + } + MAKECSR(i, nr, rptr); + + if (rptr[nr] > 6*nr) { + for (i=0; i<nf; i++) { + for (j=fptr[i]; j<fptr[i+1]; j++) + rind[rptr[find[j]]++] = i; + } + SHIFTCSR(i, nr, rptr); + + if (fval) { + for (i=0; i<nf; i++) { + for (j=fptr[i]; j<fptr[i+1]; j++) + rval[rptr[find[j]]++] = fval[j]; + } + SHIFTCSR(i, nr, rptr); + } + } + else { + if (fval) { + for (i=0; i<nf; i++) { + for (j=fptr[i]; j<fptr[i+1]; j++) { + k = find[j]; + rind[rptr[k]] = i; + rval[rptr[k]++] = fval[j]; + } + } + } + else { + for (i=0; i<nf; i++) { + for (j=fptr[i]; j<fptr[i+1]; j++) + rind[rptr[find[j]]++] = i; + } + } + SHIFTCSR(i, nr, rptr); + } +} + + +/*************************************************************************/ +/*! Normalizes the rows/columns of the matrix to be unit + length. + \param mat the matrix itself, + \param what indicates what will be normalized and is obtained by + specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. + \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm +*/ +/**************************************************************************/ +void gk_csr_Normalize(gk_csr_t *mat, int what, int norm) +{ + ssize_t i, j; + int n; + ssize_t *ptr; + float *val, sum; + + + if (what&GK_CSR_ROW && mat->rowval) { + n = mat->nrows; + ptr = mat->rowptr; + val = mat->rowval; + + #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static) + for (i=0; i<n; i++) { + sum = 0.0; + if (norm == 1) { + for (j=ptr[i]; j<ptr[i+1]; j++) + sum += val[j]; /* assume val[j] > 0 */ + if (sum > 0) + sum = 1.0/sum; + } + else if (norm == 2) { + for (j=ptr[i]; j<ptr[i+1]; j++) + sum += val[j]*val[j]; + if (sum > 0) + sum = 1.0/sqrt(sum); + } + for (j=ptr[i]; j<ptr[i+1]; j++) + val[j] *= sum; + } + } + + if (what&GK_CSR_COL && mat->colval) { + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static) + for (i=0; i<n; i++) { + sum = 0.0; + if (norm == 1) { + for (j=ptr[i]; j<ptr[i+1]; j++) + sum += val[j]; /* assume val[j] > 0 */ + if (sum > 0) + sum = 1.0/sum; + } + else if (norm == 2) { + for (j=ptr[i]; j<ptr[i+1]; j++) + sum += val[j]*val[j]; + if (sum > 0) + sum = 1.0/sqrt(sum); + } + for (j=ptr[i]; j<ptr[i+1]; j++) + val[j] *= sum; + } + } + +} + + +/*************************************************************************/ +/*! Applies different row scaling methods. + \param mat the matrix itself, + \param type indicates the type of row scaling. Possible values are: + GK_CSR_MAXTF, GK_CSR_SQRT, GK_CSR_LOG, GK_CSR_IDF, GK_CSR_MAXTF2. +*/ +/**************************************************************************/ +void gk_csr_Scale(gk_csr_t *mat, int type) +{ + ssize_t i, j; + int nrows, ncols, nnzcols, bgfreq; + ssize_t *rowptr; + int *rowind, *collen; + float *rowval, *cscale, maxtf; + double logscale = 1.0/log(2.0); + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + switch (type) { + case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static) + for (i=0; i<nrows; i++) { + maxtf = fabs(rowval[rowptr[i]]); + for (j=rowptr[i]; j<rowptr[i+1]; j++) + maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf); + + for (j=rowptr[i]; j<rowptr[i+1]; j++) + rowval[j] = .5 + .5*rowval[j]/maxtf; + } + break; + + case GK_CSR_MAXTF2: /* TF' = .1 + .9*TF/MAX(TF) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static) + for (i=0; i<nrows; i++) { + maxtf = fabs(rowval[rowptr[i]]); + for (j=rowptr[i]; j<rowptr[i+1]; j++) + maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf); + + for (j=rowptr[i]; j<rowptr[i+1]; j++) + rowval[j] = .1 + .9*rowval[j]/maxtf; + } + break; + + case GK_CSR_SQRT: /* TF' = .1+SQRT(TF) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = .1+sign(rowval[j], sqrt(fabs(rowval[j]))); + } + } + + break; + + case GK_CSR_POW25: /* TF' = .1+POW(TF,.25) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = .1+sign(rowval[j], sqrt(sqrt(fabs(rowval[j])))); + } + } + break; + + case GK_CSR_POW65: /* TF' = .1+POW(TF,.65) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .65)); + } + } + break; + + case GK_CSR_POW75: /* TF' = .1+POW(TF,.75) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .75)); + } + } + break; + + case GK_CSR_POW85: /* TF' = .1+POW(TF,.85) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .85)); + } + } + break; + + case GK_CSR_LOG: /* TF' = 1+log_2(TF) */ + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) schedule(static,32) + for (i=0; i<rowptr[nrows]; i++) { + if (rowval[i] != 0.0) + rowval[i] = 1+(rowval[i]>0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale; + } +#ifdef XXX + #pragma omp parallel for private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (rowval[j] != 0.0) + rowval[j] = 1+(rowval[j]>0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale; + //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale); + } + } +#endif + break; + + case GK_CSR_IDF: /* TF' = TF*IDF */ + ncols = mat->ncols; + cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale"); + collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen"); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + collen[rowind[j]]++; + } + + #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) + for (i=0; i<ncols; i++) + cscale[i] = (collen[i] > 0 ? log(1.0*nrows/collen[i]) : 0.0); + + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + rowval[j] *= cscale[rowind[j]]; + } + + gk_free((void **)&cscale, &collen, LTERM); + break; + + case GK_CSR_IDF2: /* TF' = TF*IDF */ + ncols = mat->ncols; + cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale"); + collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen"); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + collen[rowind[j]]++; + } + + nnzcols = 0; + #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) reduction(+:nnzcols) + for (i=0; i<ncols; i++) + nnzcols += (collen[i] > 0 ? 1 : 0); + + bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols)); + printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq); + + #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) + for (i=0; i<ncols; i++) + cscale[i] = (collen[i] > 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0); + + #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static) + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + rowval[j] *= cscale[rowind[j]]; + } + + gk_free((void **)&cscale, &collen, LTERM); + break; + + default: + gk_errexit(SIGERR, "Unknown scaling type of %d\n", type); + } +} + + +/*************************************************************************/ +/*! Computes the sums of the rows/columns + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which + sums to compute. +*/ +/**************************************************************************/ +void gk_csr_ComputeSums(gk_csr_t *mat, int what) +{ + ssize_t i; + int n; + ssize_t *ptr; + float *val, *sums; + + switch (what) { + case GK_CSR_ROW: + n = mat->nrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rsums) + gk_free((void **)&mat->rsums, LTERM); + + sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->csums) + gk_free((void **)&mat->csums, LTERM); + + sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums"); + break; + default: + gk_errexit(SIGERR, "Invalid sum type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + sums[i] = gk_fsum(ptr[i+1]-ptr[i], val+ptr[i], 1); + } + else { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + sums[i] = ptr[i+1]-ptr[i]; + } +} + + +/*************************************************************************/ +/*! Computes the norms of the rows/columns + + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which + squared norms to compute. + + \note If the rowval/colval arrays are NULL, the matrix is assumed + to be binary and the norms are computed accordingly. +*/ +/**************************************************************************/ +void gk_csr_ComputeNorms(gk_csr_t *mat, int what) +{ + ssize_t i; + int n; + ssize_t *ptr; + float *val, *norms; + + switch (what) { + case GK_CSR_ROW: + n = mat->nrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM); + + norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM); + + norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + default: + gk_errexit(SIGERR, "Invalid norm type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + norms[i] = sqrt(gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1)); + } + else { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + norms[i] = sqrt(ptr[i+1]-ptr[i]); + } +} + + +/*************************************************************************/ +/*! Computes the squared of the norms of the rows/columns + + \param mat the matrix itself, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating which + squared norms to compute. + + \note If the rowval/colval arrays are NULL, the matrix is assumed + to be binary and the norms are computed accordingly. +*/ +/**************************************************************************/ +void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what) +{ + ssize_t i; + int n; + ssize_t *ptr; + float *val, *norms; + + switch (what) { + case GK_CSR_ROW: + n = mat->nrows; + ptr = mat->rowptr; + val = mat->rowval; + + if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM); + + norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + case GK_CSR_COL: + n = mat->ncols; + ptr = mat->colptr; + val = mat->colval; + + if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM); + + norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms"); + break; + default: + gk_errexit(SIGERR, "Invalid norm type of %d.\n", what); + return; + } + + if (val) { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + norms[i] = gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1); + } + else { + #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static) + for (i=0; i<n; i++) + norms[i] = ptr[i+1]-ptr[i]; + } +} + + +/*************************************************************************/ +/*! Returns a new matrix whose rows/columns are shuffled. + + \param mat the matrix to be shuffled, + \param what indicates if the rows (GK_CSR_ROW), columns (GK_CSR_COL), + or both (GK_CSR_ROWCOL) will be shuffled, + \param symmetric indicates if the same shuffling will be applied to + both rows and columns. This is valid with nrows==ncols and + GK_CSR_ROWCOL was specified. + \returns the shuffled matrix. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int symmetric) +{ + ssize_t i, j; + int nrows, ncols; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind; + int *rperm, *cperm; + float *rowval, *nrowval; + gk_csr_t *nmat; + + if (what == GK_CSR_ROWCOL && symmetric && mat->nrows != mat->ncols) + gk_errexit(SIGERR, "The matrix is not square for a symmetric rowcol shuffling.\n"); + + nrows = mat->nrows; + ncols = mat->ncols; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + rperm = gk_imalloc(nrows, "gk_csr_Shuffle: rperm"); + cperm = gk_imalloc(ncols, "gk_csr_Shuffle: cperm"); + + switch (what) { + case GK_CSR_ROW: + gk_RandomPermute(nrows, rperm, 1); + for (i=0; i<20; i++) + gk_RandomPermute(nrows, rperm, 0); + + for (i=0; i<ncols; i++) + cperm[i] = i; + break; + + case GK_CSR_COL: + gk_RandomPermute(ncols, cperm, 1); + for (i=0; i<20; i++) + gk_RandomPermute(ncols, cperm, 0); + + for (i=0; i<nrows; i++) + rperm[i] = i; + break; + + case GK_CSR_ROWCOL: + gk_RandomPermute(nrows, rperm, 1); + for (i=0; i<20; i++) + gk_RandomPermute(nrows, rperm, 0); + + if (symmetric) + gk_icopy(nrows, rperm, cperm); + else { + gk_RandomPermute(ncols, cperm, 1); + for (i=0; i<20; i++) + gk_RandomPermute(ncols, cperm, 0); + } + break; + + default: + gk_free((void **)&rperm, &cperm, LTERM); + gk_errexit(SIGERR, "Unknown shuffling type of %d\n", what); + return NULL; + } + + nmat = gk_csr_Create(); + nmat->nrows = nrows; + nmat->ncols = ncols; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Shuffle: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Shuffle: nrowind"); + nrowval = nmat->rowval = (rowval ? gk_fmalloc(rowptr[nrows], "gk_csr_Shuffle: nrowval") : NULL) ; + + for (i=0; i<nrows; i++) + nrowptr[rperm[i]] = rowptr[i+1]-rowptr[i]; + MAKECSR(i, nrows, nrowptr); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + nrowind[nrowptr[rperm[i]]] = cperm[rowind[j]]; + if (nrowval) + nrowval[nrowptr[rperm[i]]] = rowval[j]; + nrowptr[rperm[i]]++; + } + } + SHIFTCSR(i, nrows, nrowptr); + + gk_free((void **)&rperm, &cperm, LTERM); + + return nmat; + +} + + +/*************************************************************************/ +/*! Returns the transpose of the matrix. + + \param mat the matrix to be transposed, + \returns the transposed matrix. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_Transpose(gk_csr_t *mat) +{ + int nrows, ncols; + ssize_t *colptr; + int32_t *colind; + float *colval; + gk_csr_t *nmat; + + colptr = mat->colptr; + colind = mat->colind; + colval = mat->colval; + + mat->colptr = NULL; + mat->colind = NULL; + mat->colval = NULL; + + gk_csr_CreateIndex(mat, GK_CSR_COL); + + nmat = gk_csr_Create(); + nmat->nrows = mat->ncols; + nmat->ncols = mat->nrows; + nmat->rowptr = mat->colptr; + nmat->rowind = mat->colind; + nmat->rowval = mat->colval; + + mat->colptr = colptr; + mat->colind = colind; + mat->colval = colval; + + return nmat; + +} + + +/*************************************************************************/ +/*! Computes the similarity between two rows/columns + + \param mat the matrix itself. The routine assumes that the indices + are sorted in increasing order. + \param i1 is the first row/column, + \param i2 is the second row/column, + \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of + objects between the similarity will be computed, + \param simtype is the type of similarity and is one of GK_CSR_COS, + GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN + \returns the similarity between the two rows/columns. +*/ +/**************************************************************************/ +float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, + int simtype) +{ + int nind1, nind2; + int *ind1, *ind2; + float *val1, *val2, stat1, stat2, sim; + + switch (what) { + case GK_CSR_ROW: + if (!mat->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + nind1 = mat->rowptr[i1+1]-mat->rowptr[i1]; + nind2 = mat->rowptr[i2+1]-mat->rowptr[i2]; + ind1 = mat->rowind + mat->rowptr[i1]; + ind2 = mat->rowind + mat->rowptr[i2]; + val1 = mat->rowval + mat->rowptr[i1]; + val2 = mat->rowval + mat->rowptr[i2]; + break; + + case GK_CSR_COL: + if (!mat->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + nind1 = mat->colptr[i1+1]-mat->colptr[i1]; + nind2 = mat->colptr[i2+1]-mat->colptr[i2]; + ind1 = mat->colind + mat->colptr[i1]; + ind2 = mat->colind + mat->colptr[i2]; + val1 = mat->colval + mat->colptr[i1]; + val2 = mat->colval + mat->colptr[i2]; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return 0.0; + } + + + switch (simtype) { + case GK_CSR_COS: + case GK_CSR_JAC: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]*val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]*val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else { + sim += val1[i1]*val2[i2]; + stat1 += val1[i1]*val1[i1]; + stat2 += val2[i2]*val2[i2]; + i1++; + i2++; + } + } + if (simtype == GK_CSR_COS) + sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0); + else + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + break; + + case GK_CSR_MIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + + break; + + case GK_CSR_AMIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1 > 0.0 ? sim/stat1 : 0.0); + + break; + + default: + gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype); + return -1; + } + + return sim; + +} + + +/*************************************************************************/ +/*! Computes the similarity between two rows/columns + + \param mat_a the first matrix. The routine assumes that the indices + are sorted in increasing order. + \param mat_b the second matrix. The routine assumes that the indices + are sorted in increasing order. + \param i1 is the row/column from the first matrix (mat_a), + \param i2 is the row/column from the second matrix (mat_b), + \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of + objects between the similarity will be computed, + \param simtype is the type of similarity and is one of GK_CSR_COS, + GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN + \returns the similarity between the two rows/columns. +*/ +/**************************************************************************/ +float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, + int i1, int i2, int what, int simtype) +{ + int nind1, nind2; + int *ind1, *ind2; + float *val1, *val2, stat1, stat2, sim; + + switch (what) { + case GK_CSR_ROW: + if (!mat_a->rowptr || !mat_b->rowptr) + gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n"); + nind1 = mat_a->rowptr[i1+1]-mat_a->rowptr[i1]; + nind2 = mat_b->rowptr[i2+1]-mat_b->rowptr[i2]; + ind1 = mat_a->rowind + mat_a->rowptr[i1]; + ind2 = mat_b->rowind + mat_b->rowptr[i2]; + val1 = mat_a->rowval + mat_a->rowptr[i1]; + val2 = mat_b->rowval + mat_b->rowptr[i2]; + break; + + case GK_CSR_COL: + if (!mat_a->colptr || !mat_b->colptr) + gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n"); + nind1 = mat_a->colptr[i1+1]-mat_a->colptr[i1]; + nind2 = mat_b->colptr[i2+1]-mat_b->colptr[i2]; + ind1 = mat_a->colind + mat_a->colptr[i1]; + ind2 = mat_b->colind + mat_b->colptr[i2]; + val1 = mat_a->colval + mat_a->colptr[i1]; + val2 = mat_b->colval + mat_b->colptr[i2]; + break; + + default: + gk_errexit(SIGERR, "Invalid index type of %d.\n", what); + return 0.0; + } + + + switch (simtype) { + case GK_CSR_COS: + case GK_CSR_JAC: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]*val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]*val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]*val2[i2]; + i2++; + } + else { + sim += val1[i1]*val2[i2]; + stat1 += val1[i1]*val1[i1]; + stat2 += val2[i2]*val2[i2]; + i1++; + i2++; + } + } + if (simtype == GK_CSR_COS) + sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0); + else + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + break; + + case GK_CSR_MIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0); + + break; + + case GK_CSR_AMIN: + sim = stat1 = stat2 = 0.0; + i1 = i2 = 0; + while (i1<nind1 && i2<nind2) { + if (i1 == nind1) { + stat2 += val2[i2]; + i2++; + } + else if (i2 == nind2) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] < ind2[i2]) { + stat1 += val1[i1]; + i1++; + } + else if (ind1[i1] > ind2[i2]) { + stat2 += val2[i2]; + i2++; + } + else { + sim += gk_min(val1[i1],val2[i2]); + stat1 += val1[i1]; + stat2 += val2[i2]; + i1++; + i2++; + } + } + sim = (stat1 > 0.0 ? sim/stat1 : 0.0); + + break; + + default: + gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype); + return -1; + } + + return sim; + +} + +/*************************************************************************/ +/*! Finds the n most similar rows (neighbors) to the query. + + \param mat the matrix itself + \param nqterms is the number of columns in the query + \param qind is the list of query columns + \param qval is the list of correspodning query weights + \param simtype is the type of similarity and is one of GK_CSR_DOTP, + GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN. In case of + GK_CSR_COS, the rows and the query are assumed to be of unit + length. + \param nsim is the maximum number of requested most similar rows. + If -1 is provided, then everything is returned unsorted. + \param minsim is the minimum similarity of the requested most + similar rows + \param hits is the result set. This array should be at least + of length nsim. + \param i_marker is an array of size equal to the number of rows + whose values are initialized to -1. If NULL is provided + then this array is allocated and freed internally. + \param i_cand is an array of size equal to the number of rows. + If NULL is provided then this array is allocated and freed + internally. + \returns The number of identified most similar rows, which can be + smaller than the requested number of nnbrs in those cases + in which there are no sufficiently many neighbors. +*/ +/**************************************************************************/ +int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, + float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, + int *i_marker, gk_fkv_t *i_cand) +{ + ssize_t i, ii, j, k; + int nrows, ncols, ncand; + ssize_t *colptr; + int *colind, *marker; + float *colval, *rnorms, mynorm, *rsums, mysum; + gk_fkv_t *cand; + + if (nqterms == 0) + return 0; + + nrows = mat->nrows; + ncols = mat->ncols; + GKASSERT((colptr = mat->colptr) != NULL); + GKASSERT((colind = mat->colind) != NULL); + GKASSERT((colval = mat->colval) != NULL); + + marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker")); + cand = (i_cand ? i_cand : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand")); + + switch (simtype) { + case GK_CSR_DOTP: + case GK_CSR_COS: + for (ncand=0, ii=0; ii<nqterms; ii++) { + i = qind[ii]; + if (i < ncols) { + for (j=colptr[i]; j<colptr[i+1]; j++) { + k = colind[j]; + if (marker[k] == -1) { + cand[ncand].val = k; + cand[ncand].key = 0; + marker[k] = ncand++; + } + cand[marker[k]].key += colval[j]*qval[ii]; + } + } + } + break; + + case GK_CSR_JAC: + for (ncand=0, ii=0; ii<nqterms; ii++) { + i = qind[ii]; + if (i < ncols) { + for (j=colptr[i]; j<colptr[i+1]; j++) { + k = colind[j]; + if (marker[k] == -1) { + cand[ncand].val = k; + cand[ncand].key = 0; + marker[k] = ncand++; + } + cand[marker[k]].key += colval[j]*qval[ii]; + } + } + } + + GKASSERT((rnorms = mat->rnorms) != NULL); + mynorm = gk_fdot(nqterms, qval, 1, qval, 1); + + for (i=0; i<ncand; i++) + cand[i].key = cand[i].key/(rnorms[cand[i].val]+mynorm-cand[i].key); + break; + + case GK_CSR_MIN: + for (ncand=0, ii=0; ii<nqterms; ii++) { + i = qind[ii]; + if (i < ncols) { + for (j=colptr[i]; j<colptr[i+1]; j++) { + k = colind[j]; + if (marker[k] == -1) { + cand[ncand].val = k; + cand[ncand].key = 0; + marker[k] = ncand++; + } + cand[marker[k]].key += gk_min(colval[j], qval[ii]); + } + } + } + + GKASSERT((rsums = mat->rsums) != NULL); + mysum = gk_fsum(nqterms, qval, 1); + + for (i=0; i<ncand; i++) + cand[i].key = cand[i].key/(rsums[cand[i].val]+mysum-cand[i].key); + break; + + /* Assymetric MIN similarity */ + case GK_CSR_AMIN: + for (ncand=0, ii=0; ii<nqterms; ii++) { + i = qind[ii]; + if (i < ncols) { + for (j=colptr[i]; j<colptr[i+1]; j++) { + k = colind[j]; + if (marker[k] == -1) { + cand[ncand].val = k; + cand[ncand].key = 0; + marker[k] = ncand++; + } + cand[marker[k]].key += gk_min(colval[j], qval[ii]); + } + } + } + + mysum = gk_fsum(nqterms, qval, 1); + + for (i=0; i<ncand; i++) + cand[i].key = cand[i].key/mysum; + break; + + default: + gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype); + return -1; + } + + /* go and prune the hits that are bellow minsim */ + for (j=0, i=0; i<ncand; i++) { + marker[cand[i].val] = -1; + if (cand[i].key >= minsim) + cand[j++] = cand[i]; + } + ncand = j; + + if (nsim == -1 || nsim >= ncand) { + nsim = ncand; + } + else { + nsim = gk_min(nsim, ncand); + gk_dfkvkselect(ncand, nsim, cand); + gk_fkvsortd(nsim, cand); + } + + gk_fkvcopy(nsim, cand, hits); + + if (i_marker == NULL) + gk_free((void **)&marker, LTERM); + if (i_cand == NULL) + gk_free((void **)&cand, LTERM); + + return nsim; +} + + +/*************************************************************************/ +/*! Returns a symmetric version of a square matrix. The symmetric version + is constructed by applying an A op A^T operation, where op is one of + GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, GK_CSR_SYM_AVG. + + \param mat the matrix to be symmetrized, + \param op indicates the operation to be performed. The possible values are + GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, and GK_CSR_SYM_AVG. + + \returns the symmetrized matrix consisting only of its row-based structure. + The input matrix is not modified. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op) +{ + ssize_t i, j, k, nnz; + int nrows, nadj, hasvals; + ssize_t *rowptr, *colptr, *nrowptr; + int *rowind, *colind, *nrowind, *marker, *ids; + float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL; + gk_csr_t *nmat; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_MakeSymmetric: The matrix needs to be square.\n"); + return NULL; + } + + hasvals = (mat->rowval != NULL); + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + if (hasvals) + rowval = mat->rowval; + + /* create the column view for efficient processing */ + colptr = gk_zsmalloc(nrows+1, 0, "colptr"); + colind = gk_i32malloc(rowptr[nrows], "colind"); + if (hasvals) + colval = gk_fmalloc(rowptr[nrows], "colval"); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + colptr[rowind[j]]++; + } + MAKECSR(i, nrows, colptr); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + colind[colptr[rowind[j]]] = i; + if (hasvals) + colval[colptr[rowind[j]]] = rowval[j]; + colptr[rowind[j]]++; + } + } + SHIFTCSR(i, nrows, colptr); + + + nmat = gk_csr_Create(); + + nmat->nrows = mat->nrows; + nmat->ncols = mat->ncols; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr"); + nrowind = nmat->rowind = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind"); + if (hasvals) + nrowval = nmat->rowval = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + + marker = gk_ismalloc(nrows, -1, "marker"); + ids = gk_imalloc(nrows, "ids"); + if (hasvals) + wgts = gk_fmalloc(nrows, "wgts"); + + nrowptr[0] = nnz = 0; + for (i=0; i<nrows; i++) { + nadj = 0; + /* out-edges */ + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + ids[nadj] = rowind[j]; + if (hasvals) + wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]); + marker[rowind[j]] = nadj++; + } + + /* in-edges */ + for (j=colptr[i]; j<colptr[i+1]; j++) { + if (marker[colind[j]] == -1) { + if (op != GK_CSR_SYM_MIN) { + ids[nadj] = colind[j]; + if (hasvals) + wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]); + nadj++; + } + } + else { + if (hasvals) { + switch (op) { + case GK_CSR_SYM_MAX: + wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_MIN: + wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_SUM: + wgts[marker[colind[j]]] += colval[j]; + break; + case GK_CSR_SYM_AVG: + wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]); + break; + default: + errexit("Unsupported op for MakeSymmetric!\n"); + } + } + marker[colind[j]] = -1; + } + } + + /* go over out edges again to resolve any edges that were not found in the in + * edges */ + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (marker[rowind[j]] != -1) { + if (op == GK_CSR_SYM_MIN) + ids[marker[rowind[j]]] = -1; + marker[rowind[j]] = -1; + } + } + + /* put the non '-1' entries in ids[] into i's row */ + for (j=0; j<nadj; j++) { + if (ids[j] != -1) { + nrowind[nnz] = ids[j]; + if (hasvals) + nrowval[nnz] = wgts[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + + gk_free((void **)&colptr, &colind, &colval, &marker, &ids, &wgts, LTERM); + + return nmat; +} + + +/*************************************************************************/ +/*! This function finds the connected components in a graph stored in + CSR format. + + \param mat is the graph structure in CSR format + \param cptr is the ptr structure of the CSR representation of the + components. The length of this vector must be mat->nrows+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be mat->nrows. + \param cids is an array that stores the component # of each vertex + of the graph. The length of this vector must be mat->nrows. + + \returns the number of components that it found. + + \note The cptr, cind, and cids parameters can be NULL, in which case + only the number of connected components is returned. +*/ +/*************************************************************************/ +int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, + int32_t *cids) +{ + ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps; + ssize_t *xadj; + int32_t *adjncy, *pos, *todo; + int32_t mustfree_ccsr=0, mustfree_where=0; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_FindComponents: The matrix needs to be square.\n"); + return -1; + } + + nvtxs = mat->nrows; + xadj = mat->rowptr; + adjncy = mat->rowind; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = gk_i32malloc(nvtxs+1, "gk_csr_FindComponents: cptr"); + cind = gk_i32malloc(nvtxs, "gk_csr_FindComponents: cind"); + mustfree_ccsr = 1; + } + + /* The list of vertices that have not been touched yet. + The valid entries are from [0..ntodo). */ + todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: todo")); + + /* For a vertex that has not been visited, pos[i] is the position in the + todo list that this vertex is stored. + If a vertex has been visited, pos[i] = -1. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: pos")); + + + /* Find the connected componends */ + ncmps = -1; + ntodo = nvtxs; /* All vertices have not been visited */ + first = last = 0; /* Point to the first and last vertices that have been touched + but not explored. + These vertices are stored in cind[first]...cind[last-1]. */ + + while (first < last || ntodo > 0) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; /* Mark the end of the current CC */ + + /* put the first vertex in the todo list as the start of the new CC */ + ASSERT(pos[todo[0]] != -1); + cind[last++] = todo[0]; + + pos[todo[0]] = -1; + todo[0] = todo[--ntodo]; + pos[todo[0]] = 0; + } + + i = cind[first++]; /* Get the first visited but unexplored vertex */ + + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (pos[k] != -1) { + cind[last++] = k; + + /* Remove k from the todo list and put the last item in the todo + list at the position that k was so that the todo list will be + consequtive. The pos[] array is updated accordingly to keep track + the location of the vertices in the todo[] list. */ + todo[pos[k]] = todo[--ntodo]; + pos[todo[pos[k]]] = pos[k]; + pos[k] = -1; + } + } + } + cptr[++ncmps] = first; + + /* see if we need to return cids */ + if (cids != NULL) { + for (i=0; i<ncmps; i++) { + for (j=cptr[i]; j<cptr[i+1]; j++) + cids[cind[j]] = i; + } + } + + if (mustfree_ccsr) + gk_free((void **)&cptr, &cind, LTERM); + + gk_free((void **)&pos, &todo, LTERM); + + return (int) ncmps; +} + + +/*************************************************************************/ +/*! Returns a matrix that has been reordered according to the provided + row/column permutation. The matrix is required to be square and the same + permutation is applied to both rows and columns. + + \param[IN] mat is the matrix to be re-ordered. + \param[IN] perm is the new ordering of the rows & columns + \param[IN] iperm is the original ordering of the re-ordered matrix's rows & columns + \returns the newly created reordered matrix. + + \note Either perm or iperm can be NULL but not both. +*/ +/**************************************************************************/ +gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm) +{ + ssize_t j, jj; + ssize_t *rowptr, *nrowptr; + int i, k, u, v, nrows; + int freeperm=0, freeiperm=0; + int32_t *rowind, *nrowind; + float *rowval, *nrowval; + gk_csr_t *nmat; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_ReorderSymmetric: The matrix needs to be square.\n"); + return NULL; + } + + if (perm == NULL && iperm == NULL) + return NULL; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + nmat = gk_csr_Create(); + + nmat->nrows = nrows; + nmat->ncols = nrows; + + nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ReorderSymmetric: rowptr"); + nrowind = nmat->rowind = gk_i32malloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowind"); + nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowval"); + + /* allocate memory for the different structures present in the matrix */ + if (mat->rlabels) + nmat->rlabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rlabels"); + if (mat->rmap) + nmat->rmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rmap"); + if (mat->rnorms) + nmat->rnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rnorms"); + if (mat->rsums) + nmat->rsums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsums"); + if (mat->rsizes) + nmat->rsizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsizes"); + if (mat->rvols) + nmat->rvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rvols"); + if (mat->rwgts) + nmat->rwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rwgts"); + + if (mat->clabels) + nmat->clabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: clabels"); + if (mat->cmap) + nmat->cmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: cmap"); + if (mat->cnorms) + nmat->cnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cnorms"); + if (mat->csums) + nmat->csums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csums"); + if (mat->csizes) + nmat->csizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csizes"); + if (mat->cvols) + nmat->cvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cvols"); + if (mat->cwgts) + nmat->cwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cwgts"); + + + + /* create perm/iperm if not provided */ + if (perm == NULL) { + freeperm = 1; + perm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: perm"); + for (i=0; i<nrows; i++) + perm[iperm[i]] = i; + } + if (iperm == NULL) { + freeiperm = 1; + iperm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: iperm"); + for (i=0; i<nrows; i++) + iperm[perm[i]] = i; + } + + /* fill-in the information of the re-ordered matrix */ + nrowptr[0] = jj = 0; + for (v=0; v<nrows; v++) { + u = iperm[v]; + for (j=rowptr[u]; j<rowptr[u+1]; j++, jj++) { + nrowind[jj] = perm[rowind[j]]; + nrowval[jj] = rowval[j]; + } + + if (mat->rlabels) + nmat->rlabels[v] = mat->rlabels[u]; + if (mat->rmap) + nmat->rmap[v] = mat->rmap[u]; + if (mat->rnorms) + nmat->rnorms[v] = mat->rnorms[u]; + if (mat->rsums) + nmat->rsums[v] = mat->rsums[u]; + if (mat->rsizes) + nmat->rsizes[v] = mat->rsizes[u]; + if (mat->rvols) + nmat->rvols[v] = mat->rvols[u]; + if (mat->rwgts) + nmat->rwgts[v] = mat->rwgts[u]; + + if (mat->clabels) + nmat->clabels[v] = mat->clabels[u]; + if (mat->cmap) + nmat->cmap[v] = mat->cmap[u]; + if (mat->cnorms) + nmat->cnorms[v] = mat->cnorms[u]; + if (mat->csums) + nmat->csums[v] = mat->csums[u]; + if (mat->csizes) + nmat->csizes[v] = mat->csizes[u]; + if (mat->cvols) + nmat->cvols[v] = mat->cvols[u]; + if (mat->cwgts) + nmat->cwgts[v] = mat->cwgts[u]; + + nrowptr[v+1] = jj; + } + + + /* free memory */ + if (freeperm) + gk_free((void **)&perm, LTERM); + if (freeiperm) + gk_free((void **)&iperm, LTERM); + + return nmat; +} + + +/*************************************************************************/ +/*! This function computes a permutation of the rows/columns of a symmetric + matrix based on a breadth-first-traversal. It can be used for re-ordering + the matrix to reduce its bandwidth for better cache locality. + + \param[IN] mat is the matrix whose ordering to be computed. + \param[IN] maxdegree is the maximum number of nonzeros of the rows that + will participate in the BFS ordering. Rows with more nonzeros + will be put at the front of the ordering in decreasing degree + order. + \param[IN] v is the starting row of the BFS. A value of -1 indicates that + a randomly selected row will be used. + \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix. + \param[OUT] iperm[i] stores the ID of the row that corresponds to + the ith vertex in the re-ordered matrix. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, + int32_t **r_perm, int32_t **r_iperm) +{ + int i, k, nrows, first, last; + ssize_t j, *rowptr; + int32_t *rowind, *cot, *pos; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: The matrix needs to be square.\n"); + return; + } + if (maxdegree < mat->nrows && v != -1) { + fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: Since maxdegree node renumbering is requested the starting row should be -1.\n"); + return; + } + if (mat->nrows <= 0) + return; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + + /* This array will function like pos + touched of the CC method */ + pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: pos")); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored rows; + Positions from [first...last) is the OPEN list (i.e., visited rows); + Positions from [last...nrows) is the todo list. */ + cot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: cot")); + + first = last = 0; + + /* deal with maxdegree handling */ + if (maxdegree < nrows) { + last = nrows; + for (i=nrows-1; i>=0; i--) { + if (rowptr[i+1]-rowptr[i] < maxdegree) { + cot[--last] = i; + pos[i] = last; + } + else { + cot[first++] = i; + pos[i] = -1; + } + } + GKASSERT(first == last); + + if (last > 0) { /* reorder them in degree decreasing order */ + gk_ikv_t *cand = gk_ikvmalloc(first, "gk_csr_ComputeBFSOrderingSymmetric: cand"); + + for (i=0; i<first; i++) { + k = cot[i]; + cand[i].key = (int)(rowptr[k+1]-rowptr[k]); + cand[i].val = k; + } + + gk_ikvsortd(first, cand); + for (i=0; i<first; i++) + cot[i] = cand[i].val; + + gk_free((void **)&cand, LTERM); + } + + v = cot[last + RandomInRange(nrows-last)]; + } + + + /* swap v with the front of the todo list */ + cot[pos[v]] = cot[last]; + pos[cot[last]] = pos[v]; + + cot[last] = v; + pos[v] = last; + + + /* start processing the nodes */ + while (first < nrows) { + if (first == last) { /* find another starting row */ + k = cot[last]; + GKASSERT(pos[k] != -1); + pos[k] = -1; /* mark node as being visited */ + last++; + } + + i = cot[first++]; /* the ++ advances the explored rows */ + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + k = rowind[j]; + /* if a node has already been visited, its perm[] will be -1 */ + if (pos[k] != -1) { + /* pos[k] is the location within iperm of where k resides (it is in the 'todo' part); + It is placed in that location cot[last] (end of OPEN list) that we + are about to overwrite and update pos[cot[last]] to reflect that. */ + cot[pos[k]] = cot[last]; /* put the head of the todo list to + where k was in the todo list */ + pos[cot[last]] = pos[k]; /* update perm to reflect the move */ + + cot[last++] = k; /* put node at the end of the OPEN list */ + pos[k] = -1; /* mark node as being visited */ + } + } + } + + /* time to decide what to return */ + if (r_perm != NULL) { + /* use the 'pos' array to build the perm array */ + for (i=0; i<nrows; i++) + pos[cot[i]] = i; + + *r_perm = pos; + pos = NULL; + } + + if (r_iperm != NULL) { + *r_iperm = cot; + cot = NULL; + } + + /* cleanup memory */ + gk_free((void **)&pos, &cot, LTERM); + +} + + +/*************************************************************************/ +/*! This function computes a permutation of the rows of a symmetric matrix + based on a best-first-traversal. It can be used for re-ordering the matrix + to reduce its bandwidth for better cache locality. + + \param[IN] mat is the matrix structure. + \param[IN] v is the starting row of the best-first traversal. + \param[IN] type indicates the criteria to use to measure the 'bestness' + of a row. + \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix. + \param[OUT] iperm[i] stores the ID of the row that corresponds to + the ith row in the re-ordered matrix. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type, + int32_t **r_perm, int32_t **r_iperm) +{ + ssize_t j, jj, *rowptr; + int i, k, u, nrows, nopen, ntodo; + int32_t *rowind, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos; + gk_i32pq_t *queue; + + if (mat->nrows != mat->ncols) { + fprintf(stderr, "gk_csr_ComputeBestFOrderingSymmetric: The matrix needs to be square.\n"); + return; + } + if (mat->nrows <= 0) + return; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: degrees"); + + /* the weighted degree of the vertices in the closed list for type==3 */ + wdegrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: wdegrees"); + + /* the sum of differences for type==4 */ + sod = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: sod"); + + /* the encountering level of a vertex type==5 */ + level = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: level"); + + /* The open+todo list of vertices. + The vertices from [0..nopen] are the open vertices. + The vertices from [nopen..ntodo) are the todo vertices. + */ + ot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: ot")); + + /* For a vertex that has not been explored, pos[i] is the position in the ot list. */ + pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: pos")); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ + perm = gk_i32smalloc(nrows, -1, "gk_csr_ComputeBestFOrderingSymmetric: perm"); + + /* create the queue and put the starting vertex in it */ + queue = gk_i32pqCreate(nrows); + gk_i32pqInsert(queue, v, 1); + + /* put v at the front of the open list */ + pos[0] = ot[0] = v; + pos[v] = ot[v] = 0; + nopen = 1; + ntodo = nrows; + + /* start processing the nodes */ + for (i=0; i<nrows; i++) { + if (nopen == 0) { /* deal with non-connected graphs */ + gk_i32pqInsert(queue, ot[0], 1); + nopen++; + } + + if ((v = gk_i32pqGetTop(queue)) == -1) + gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i); + + if (perm[v] != -1) + gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v); + perm[v] = i; + + if (ot[pos[v]] != v) + gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v); + if (pos[v] >= nopen) + gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen); + + /* remove v from the open list and re-arrange the todo part of the list */ + ot[pos[v]] = ot[nopen-1]; + pos[ot[nopen-1]] = pos[v]; + if (ntodo > nopen) { + ot[nopen-1] = ot[ntodo-1]; + pos[ot[ntodo-1]] = nopen-1; + } + nopen--; + ntodo--; + + for (j=rowptr[v]; j<rowptr[v+1]; j++) { + u = rowind[j]; + if (perm[u] == -1) { + /* update ot list, if u is not in the open list by putting it at the end + of the open list. */ + if (degrees[u] == 0) { + ot[pos[u]] = ot[nopen]; + pos[ot[nopen]] = pos[u]; + ot[nopen] = u; + pos[u] = nopen; + nopen++; + + level[u] = level[v]+1; + gk_i32pqInsert(queue, u, 0); + } + + + /* update the in-closed degree */ + degrees[u]++; + + /* update the queues based on the type */ + switch (type) { + case 1: /* DFS */ + gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]); + break; + + case 2: /* Max in closed degree */ + gk_i32pqUpdate(queue, u, degrees[u]); + break; + + case 3: /* Sum of orders in closed list */ + wdegrees[u] += i; + gk_i32pqUpdate(queue, u, wdegrees[u]); + break; + + case 4: /* Sum of order-differences */ + /* this is handled at the end of the loop */ + ; + break; + + case 5: /* BFS with in degree priority */ + gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u])); + break; + + case 6: /* Hybrid of 1+2 */ + gk_i32pqUpdate(queue, u, (i+1)*degrees[u]); + break; + + default: + ; + } + } + } + + if (type == 4) { /* update all the vertices in the open list */ + for (j=0; j<nopen; j++) { + u = ot[j]; + if (perm[u] != -1) + gk_errexit(SIGERR, "For i=%d, the open list contains a closed row: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]); + sod[u] += degrees[u]; + if (i<1000 || i%25==0) + gk_i32pqUpdate(queue, u, sod[u]); + } + } + + /* + for (j=0; j<ntodo; j++) { + if (pos[ot[j]] != j) + gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j); + } + */ + + } + + + /* time to decide what to return */ + if (r_iperm != NULL) { + /* use the 'degrees' array to build the iperm array */ + for (i=0; i<nrows; i++) + degrees[perm[i]] = i; + + *r_iperm = degrees; + degrees = NULL; + } + + if (r_perm != NULL) { + *r_perm = perm; + perm = NULL; + } + + + + + /* cleanup memory */ + gk_i32pqDestroy(queue); + gk_free((void **)&perm, °rees, &wdegrees, &sod, &ot, &pos, &level, LTERM); + +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/error.c b/3rdParty/metis/metis-5.1.1/GKlib/error.c new file mode 100644 index 000000000..e2a18cf03 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/error.c @@ -0,0 +1,214 @@ +/*! +\file error.c +\brief Various error-handling functions + +This file contains functions dealing with error reporting and termination + +\author George +\date 1/1/2007 +\version\verbatim $Id: error.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#define _GK_ERROR_C_ /* this is needed to properly declare the gk_jub* variables + as an extern function in GKlib.h */ + +#include <GKlib.h> + + +/* These are the jmp_buf for the graceful exit in case of severe errors. + Multiple buffers are defined to allow for recursive invokation. */ +#define MAX_JBUFS 128 +__thread int gk_cur_jbufs=-1; +__thread jmp_buf gk_jbufs[MAX_JBUFS]; +__thread jmp_buf gk_jbuf; + +typedef void (*gksighandler_t)(int); + +/* These are the holders of the old singal handlers for the trapped signals */ +static __thread gksighandler_t old_SIGMEM_handler; /* Custom signal */ +static __thread gksighandler_t old_SIGERR_handler; /* Custom signal */ +static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS]; /* Custom signal */ +static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS]; /* Custom signal */ + +/* The following is used to control if the gk_errexit() will actually abort or not. + There is always a single copy of this variable */ +static int gk_exit_on_error = 1; + + +/*************************************************************************/ +/*! This function sets the gk_exit_on_error variable + */ +/*************************************************************************/ +void gk_set_exit_on_error(int value) +{ + gk_exit_on_error = value; +} + + + +/*************************************************************************/ +/*! This function prints an error message and exits + */ +/*************************************************************************/ +void errexit(char *f_str,...) +{ + va_list argp; + + va_start(argp, f_str); + vfprintf(stderr, f_str, argp); + va_end(argp); + + if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n') + fprintf(stderr,"\n"); + fflush(stderr); + + if (gk_exit_on_error) + exit(-2); + + /* abort(); */ +} + + +/*************************************************************************/ +/*! This function prints an error message and raises a signum signal + */ +/*************************************************************************/ +void gk_errexit(int signum, char *f_str,...) +{ + va_list argp; + + va_start(argp, f_str); + vfprintf(stderr, f_str, argp); + va_end(argp); + + fprintf(stderr,"\n"); + fflush(stderr); + + if (gk_exit_on_error) + raise(signum); +} + + +/***************************************************************************/ +/*! This function sets a number of signal handlers and sets the return point + of a longjmp +*/ +/***************************************************************************/ +int gk_sigtrap() +{ + if (gk_cur_jbufs+1 >= MAX_JBUFS) + return 0; + + gk_cur_jbufs++; + + old_SIGMEM_handlers[gk_cur_jbufs] = signal(SIGMEM, gk_sigthrow); + old_SIGERR_handlers[gk_cur_jbufs] = signal(SIGERR, gk_sigthrow); + + return 1; +} + + +/***************************************************************************/ +/*! This function sets the handlers for the signals to their default handlers + */ +/***************************************************************************/ +int gk_siguntrap() +{ + if (gk_cur_jbufs == -1) + return 0; + + signal(SIGMEM, old_SIGMEM_handlers[gk_cur_jbufs]); + signal(SIGERR, old_SIGERR_handlers[gk_cur_jbufs]); + + gk_cur_jbufs--; + + return 1; +} + + +/*************************************************************************/ +/*! This function is the custome signal handler, which all it does is to + perform a longjump to the most recent saved environment + */ +/*************************************************************************/ +void gk_sigthrow(int signum) +{ + longjmp(gk_jbufs[gk_cur_jbufs], signum); +} + + +/*************************************************************************** +* This function sets a number of signal handlers and sets the return point +* of a longjmp +****************************************************************************/ +void gk_SetSignalHandlers() +{ + old_SIGMEM_handler = signal(SIGMEM, gk_NonLocalExit_Handler); + old_SIGERR_handler = signal(SIGERR, gk_NonLocalExit_Handler); +} + + +/*************************************************************************** +* This function sets the handlers for the signals to their default handlers +****************************************************************************/ +void gk_UnsetSignalHandlers() +{ + signal(SIGMEM, old_SIGMEM_handler); + signal(SIGERR, old_SIGERR_handler); +} + + +/************************************************************************* +* This function is the handler for SIGUSR1 that implements the cleaning up +* process prior to a non-local exit. +**************************************************************************/ +void gk_NonLocalExit_Handler(int signum) +{ + longjmp(gk_jbuf, signum); +} + + +/*************************************************************************/ +/*! \brief Thread-safe implementation of strerror() */ +/**************************************************************************/ +char *gk_strerror(int errnum) +{ +#if defined(WIN32) || defined(__MINGW32__) + return strerror(errnum); +#else +#ifndef SUNOS + static __thread char buf[1024]; + + strerror_r(errnum, buf, 1024); + + buf[1023] = '\0'; + return buf; +#else + return strerror(errnum); +#endif +#endif +} + + + +/************************************************************************* +* This function prints a backtrace of calling functions +**************************************************************************/ +void PrintBackTrace() +{ +#ifdef HAVE_EXECINFO_H + void *array[10]; + int i, size; + char **strings; + + size = backtrace(array, 10); + strings = backtrace_symbols(array, size); + + printf("Obtained %d stack frames.\n", size); + for (i=0; i<size; i++) { + printf("%s\n", strings[i]); + } + free(strings); +#endif +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/evaluate.c b/3rdParty/metis/metis-5.1.1/GKlib/evaluate.c new file mode 100644 index 000000000..ce805ced9 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/evaluate.c @@ -0,0 +1,132 @@ +/*! + \file evaluate.c + \brief Various routines to evaluate classification performance + + \author George + \date 9/23/2008 + \version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/********************************************************************** + * This function computes the max accuracy score of a ranked list, + * given +1/-1 class list + **********************************************************************/ +float ComputeAccuracy(int n, gk_fkv_t *list) +{ + int i, P, N, TP, FN = 0; + float bAccuracy = 0.0; + float acc; + + for (P=0, i=0;i<n;i++) + P += (list[i].val == 1? 1 : 0); + N = n - P; + + TP = FN = 0; + + for(i=0; i<n; i++){ + if (list[i].val == 1) + TP++; + else + FN++; + + acc = (TP + N - FN) * 100.0/ (P + N) ; + if (acc > bAccuracy) + bAccuracy = acc; + } + + return bAccuracy; +} + + +/***************************************************************************** + * This function computes the ROC score of a ranked list, given a +1/-1 class + * list. + ******************************************************************************/ +float ComputeROCn(int n, int maxN, gk_fkv_t *list) +{ + int i, P, TP, FP, TPprev, FPprev, AUC; + float prev; + + FP = TP = FPprev = TPprev = AUC = 0; + prev = list[0].key -1; + + for (P=0, i=0; i<n; i++) + P += (list[i].val == 1 ? 1 : 0); + + for (i=0; i<n && FP < maxN; i++) { + if (list[i].key != prev) { + AUC += (TP+TPprev)*(FP-FPprev)/2; + prev = list[i].key; + FPprev = FP; + TPprev = TP; + } + if (list[i].val == 1) + TP++; + else { + FP++; + } + } + AUC += (TP+TPprev)*(FP-FPprev)/2; + + return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0); +} + + +/***************************************************************************** +* This function computes the median rate of false positive for each positive +* instance. +******************************************************************************/ +float ComputeMedianRFP(int n, gk_fkv_t *list) +{ + int i, P, N, TP, FP; + + P = N = 0; + for (i=0; i<n; i++) { + if (list[i].val == 1) + P++; + else + N++; + } + + FP = TP = 0; + for (i=0; i<n && TP < (P+1)/2; i++) { + if (list[i].val == 1) + TP++; + else + FP++; + } + + return 1.0*FP/N; +} + +/********************************************************* + * Compute the mean + ********************************************************/ +float ComputeMean (int n, float *values) +{ + int i; + float mean = 0.0; + + for(i=0; i < n; i++) + mean += values[i]; + + return 1.0 * mean/ n; +} + +/******************************************************** + * Compute the standard deviation + ********************************************************/ +float ComputeStdDev(int n, float *values) +{ + int i; + float mean = ComputeMean(n, values); + float stdDev = 0; + + for(i=0;i<n;i++){ + stdDev += (values[i] - mean)* (values[i] - mean); + } + + return sqrt(1.0 * stdDev/n); +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/fkvkselect.c b/3rdParty/metis/metis-5.1.1/GKlib/fkvkselect.c new file mode 100644 index 000000000..b1238ce65 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/fkvkselect.c @@ -0,0 +1,142 @@ +/*! +\file dfkvkselect.c +\brief Sorts only the largest k values + +\date Started 7/14/00 +\author George +\version\verbatim $Id: fkvkselect.c 10711 2011-08-31 22:23:04Z karypis $\endverbatim +*/ + + +#include <GKlib.h> + +/* Byte-wise swap two items of size SIZE. */ +#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0) + + +/******************************************************************************/ +/*! This function puts the 'topk' largest values in the beginning of the array */ +/*******************************************************************************/ +int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand) +{ + int i, j, lo, hi, mid; + gk_fkv_t stmp; + float pivot; + + if (n <= topk) + return n; /* return if the array has fewer elements than we want */ + + for (lo=0, hi=n-1; lo < hi;) { + mid = lo + ((hi-lo) >> 1); + + /* select the median */ + if (cand[lo].key < cand[mid].key) + mid = lo; + if (cand[hi].key > cand[mid].key) + mid = hi; + else + goto jump_over; + if (cand[lo].key < cand[mid].key) + mid = lo; + +jump_over: + QSSWAP(cand[mid], cand[hi], stmp); + pivot = cand[hi].key; + + /* the partitioning algorithm */ + for (i=lo-1, j=lo; j<hi; j++) { + if (cand[j].key >= pivot) { + i++; + QSSWAP(cand[i], cand[j], stmp); + } + } + i++; + QSSWAP(cand[i], cand[hi], stmp); + + + if (i > topk) + hi = i-1; + else if (i < topk) + lo = i+1; + else + break; + } + +/* + if (cand[lo].key < cand[hi].key) + printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key); + + + for (i=topk; i<n; i++) { + for (j=0; j<topk; j++) + if (cand[i].key > cand[j].key) + printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi); + } +*/ + + return topk; +} + + +/******************************************************************************/ +/*! This function puts the 'topk' smallest values in the beginning of the array */ +/*******************************************************************************/ +int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand) +{ + int i, j, lo, hi, mid; + gk_fkv_t stmp; + float pivot; + + if (n <= topk) + return n; /* return if the array has fewer elements than we want */ + + for (lo=0, hi=n-1; lo < hi;) { + mid = lo + ((hi-lo) >> 1); + + /* select the median */ + if (cand[lo].key > cand[mid].key) + mid = lo; + if (cand[hi].key < cand[mid].key) + mid = hi; + else + goto jump_over; + if (cand[lo].key > cand[mid].key) + mid = lo; + +jump_over: + QSSWAP(cand[mid], cand[hi], stmp); + pivot = cand[hi].key; + + /* the partitioning algorithm */ + for (i=lo-1, j=lo; j<hi; j++) { + if (cand[j].key <= pivot) { + i++; + QSSWAP(cand[i], cand[j], stmp); + } + } + i++; + QSSWAP(cand[i], cand[hi], stmp); + + + if (i > topk) + hi = i-1; + else if (i < topk) + lo = i+1; + else + break; + } + +/* + if (cand[lo].key > cand[hi].key) + printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key); + + + for (i=topk; i<n; i++) { + for (j=0; j<topk; j++) + if (cand[i].key < cand[j].key) + printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi); + } +*/ + + return topk; +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/fs.c b/3rdParty/metis/metis-5.1.1/GKlib/fs.c new file mode 100644 index 000000000..31e6d816b --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/fs.c @@ -0,0 +1,225 @@ +/*! +\file fs.c +\brief Various file-system functions. + +This file contains various functions that deal with interfacing with +the filesystem in a portable way. + +\date Started 4/10/95 +\author George +\version\verbatim $Id: fs.c 14332 2013-05-18 12:22:57Z karypis $ \endverbatim +*/ + + +#include <GKlib.h> + + + +/************************************************************************* +* This function checks if a file exists +**************************************************************************/ +int gk_fexists(char *fname) +{ + struct stat status; + + if (stat(fname, &status) == -1) + return 0; + + return S_ISREG(status.st_mode); +} + + +/************************************************************************* +* This function checks if a directory exists +**************************************************************************/ +int gk_dexists(char *dirname) +{ + struct stat status; + + if (stat(dirname, &status) == -1) + return 0; + + return S_ISDIR(status.st_mode); +} + + +/*************************************************************************/ +/*! \brief Returns the size of the file in bytes + +This function returns the size of a file as a 64 bit integer. If there +were any errors in stat'ing the file, -1 is returned. +\note That due to the -1 return code, the maximum file size is limited to + 63 bits (which I guess is okay for now). +*/ +/**************************************************************************/ +ssize_t gk_getfsize(char *filename) +{ + struct stat status; + + if (stat(filename, &status) == -1) + return -1; + + return (size_t)(status.st_size); +} + + +/*************************************************************************/ +/*! This function gets some basic statistics about the file. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. + \param r_ntokens is the number of tokens in the file. If it is NULL, + this information is not returned. + \param r_max_nlntokens is the maximum number of tokens in any line + in the file. If it is NULL this information is not returned. + \param r_nbytes is the number of bytes in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, + size_t *r_max_nlntokens, size_t *r_nbytes) +{ + size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread; + int intoken=0; + char buffer[2049], *cptr; + FILE *fpin; + + fpin = gk_fopen(fname, "r", "gk_GetFileStats"); + + while (!feof(fpin)) { + nread = fread(buffer, sizeof(char), 2048, fpin); + nbytes += nread; + + buffer[nread] = '\0'; /* There is space for this one */ + for (cptr=buffer; *cptr!='\0'; cptr++) { + if (*cptr == '\n') { + nlines++; + ntokens += intoken; + intoken = 0; + if (max_nlntokens < ntokens-oldntokens) + max_nlntokens = ntokens-oldntokens; + oldntokens = ntokens; + } + else if (*cptr == ' ' || *cptr == '\t') { + ntokens += intoken; + intoken = 0; + } + else { + intoken = 1; + } + } + } + ntokens += intoken; + if (max_nlntokens < ntokens-oldntokens) + max_nlntokens = ntokens-oldntokens; + + gk_fclose(fpin); + + if (r_nlines != NULL) + *r_nlines = nlines; + if (r_ntokens != NULL) + *r_ntokens = ntokens; + if (r_max_nlntokens != NULL) + *r_max_nlntokens = max_nlntokens; + if (r_nbytes != NULL) + *r_nbytes = nbytes; +} + + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string containing just the basename of the file. +* The basename is derived from the actual filename by stripping the last +* .ext part. +**************************************************************************/ +char *gk_getbasename(char *path) +{ + char *startptr, *endptr; + char *basename; + + if ((startptr = strrchr(path, '/')) == NULL) + startptr = path; + else + startptr = startptr+1; + + basename = gk_strdup(startptr); + + if ((endptr = strrchr(basename, '.')) != NULL) + *endptr = '\0'; + + return basename; +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string corresponding to its file extension. The +* extension of a file is considered to be the string right after the +* last '.' character. +**************************************************************************/ +char *gk_getextname(char *path) +{ + char *startptr; + + if ((startptr = strrchr(path, '.')) == NULL) + return gk_strdup(path); + else + return gk_strdup(startptr+1); +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and just returns a string containing just the filename. +**************************************************************************/ +char *gk_getfilename(char *path) +{ + char *startptr; + + if ((startptr = strrchr(path, '/')) == NULL) + return gk_strdup(path); + else + return gk_strdup(startptr+1); +} + +/************************************************************************* +* This function takes in a potentially full path specification of a file +* and extracts the directory path component if it exists, otherwise it +* returns "./" as the path. The memory for it is dynamically allocated. +**************************************************************************/ +char *getpathname(char *path) +{ + char *endptr, *tmp; + + if ((endptr = strrchr(path, '/')) == NULL) { + return gk_strdup("."); + } + else { + tmp = gk_strdup(path); + *(strrchr(tmp, '/')) = '\0'; + return tmp; + } +} + + + +/************************************************************************* +* This function creates a path +**************************************************************************/ +int gk_mkpath(char *pathname) +{ + char tmp[2048]; + + sprintf(tmp, "mkdir -p %s", pathname); + return system(tmp); +} + + +/************************************************************************* +* This function deletes a directory tree and all of its contents +**************************************************************************/ +int gk_rmpath(char *pathname) +{ + char tmp[2048]; + + sprintf(tmp, "rm -r %s", pathname); + return system(tmp); +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/getopt.c b/3rdParty/metis/metis-5.1.1/GKlib/getopt.c new file mode 100644 index 000000000..2e7e042e4 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/getopt.c @@ -0,0 +1,855 @@ +/*************************************************************************/ +/*! \file getopt.c +\brief Command line parsing + +This file contains a implementation of GNU's Getopt facility. The purpose +for including it here is to ensure portability across different unix- and +windows-based systems. + +\warning +The implementation provided here uses the \c gk_ prefix for all variables +used by the standard Getopt facility to communicate with the program. +So, do read the documentation here. + +\verbatim + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. +\endverbatim +*/ +/*************************************************************************/ + + +#include <GKlib.h> + +/*************************************************************************/ +/* Local function prototypes */ +/*************************************************************************/ +static void exchange (char **); +static char *gk_getopt_initialize (int, char **, char *); +static int gk_getopt_internal(int argc, char **argv, char *optstring, + struct gk_option *longopts, int *longind, int long_only); + + + +/*************************************************************************/ +/*! \brief For communication arguments to the caller. + +This variable is set by getopt to point at the value of the option argument, +for those options that accept arguments. +*/ +/*************************************************************************/ +char *gk_optarg; + + +/*************************************************************************/ +/*! \brief Index in ARGV of the next element to be scanned. + +This variable is set by getopt to the index of the next element of the argv +array to be processed. Once getopt has found all of the option arguments, +you can use this variable to determine where the remaining non-option arguments +begin. +*/ +/*************************************************************************/ +int gk_optind = 1; + + +/*************************************************************************/ +/*! \brief Controls error reporting for unrecognized options. + +If the value of this variable is nonzero, then getopt prints an error +message to the standard error stream if it encounters an unknown option +character or an option with a missing required argument. This is the default +behavior. If you set this variable to zero, getopt does not print any messages, +but it still returns the character ? to indicate an error. +*/ +/*************************************************************************/ +int gk_opterr = 1; + + +/*************************************************************************/ +/*! \brief Stores unknown option characters + +When getopt encounters an unknown option character or an option with a +missing required argument, it stores that option character in this +variable. You can use this for providing your own diagnostic messages. +*/ +/*************************************************************************/ +int gk_optopt = '?'; + + +/*************************************************************************/ +/* +Records that the getopt facility has been initialized. +*/ +/*************************************************************************/ +int gk_getopt_initialized; + + +/*************************************************************************/ +/* +The next char to be scanned in the option-element in which the last option +character we returned was found. This allows us to pick up the scan where +we left off. + +If this is zero, or a null string, it means resume the scan by advancing +to the next ARGV-element. +*/ +/*************************************************************************/ +static char *nextchar; + + +/*************************************************************************/ +/* +Value of POSIXLY_CORRECT environment variable. +*/ +/*************************************************************************/ +static char *posixly_correct; + + +/*************************************************************************/ +/* +Describe how to deal with options that follow non-option ARGV-elements. + +If the caller did not specify anything, the default is REQUIRE_ORDER if +the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise. + +REQUIRE_ORDER means don't recognize them as options; stop option processing +when the first non-option is seen. This is what Unix does. This mode of +operation is selected by either setting the environment variable +POSIXLY_CORRECT, or using `+' as the first character of the list of +option characters. + +PERMUTE is the default. We permute the contents of ARGV as we scan, so +that eventually all the non-options are at the end. This allows options +to be given in any order, even with programs that were not written to +expect this. + +RETURN_IN_ORDER is an option available to programs that were written +to expect options and other ARGV-elements in any order and that care +about the ordering of the two. We describe each non-option ARGV-element +as if it were the argument of an option with character code 1. +Using `-' as the first character of the list of option characters +selects this mode of operation. + +The special argument `--' forces an end of option-scanning regardless +of the value of `ordering'. In the case of RETURN_IN_ORDER, only +`--' can cause `getopt' to return -1 with `gk_optind' != ARGC. +*/ +/*************************************************************************/ +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + + + +/*************************************************************************/ +/* +Describe the part of ARGV that contains non-options that have +been skipped. `first_nonopt' is the index in ARGV of the first of them; +`last_nonopt' is the index after the last of them. +*/ +/*************************************************************************/ +static int first_nonopt; +static int last_nonopt; + + + + + +/*************************************************************************/ +/* +Handle permutation of arguments. + +Exchange two adjacent subsequences of ARGV. +One subsequence is elements [first_nonopt,last_nonopt) +which contains all the non-options that have been skipped so far. +The other is elements [last_nonopt,gk_optind), which contains all +the options processed since those non-options were skipped. + +`first_nonopt' and `last_nonopt' are relocated so that they describe +the new indices of the non-options in ARGV after they are moved. +*/ +/*************************************************************************/ +static void exchange (char **argv) +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = gk_optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) { + if (top - middle > middle - bottom) { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (gk_optind - last_nonopt); + last_nonopt = gk_optind; +} + + + +/*************************************************************************/ +/* +Initialize the internal data when the first call is made. +*/ +/*************************************************************************/ +static char *gk_getopt_initialize (int argc, char **argv, char *optstring) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = gk_optind; + + nextchar = NULL; + + posixly_correct = getenv("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + if (optstring[0] == '-') { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + + return optstring; +} + + +/*************************************************************************/ +/* + Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `gk_optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `gk_optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `gk_opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `gk_optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + LONGOPTS is a vector of `struct gk_option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. +*/ +/*************************************************************************/ +static int gk_getopt_internal(int argc, char **argv, char *optstring, + struct gk_option *longopts, int *longind, int long_only) +{ + int print_errors = gk_opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + gk_optarg = NULL; + + if (gk_optind == 0 || !gk_getopt_initialized) { + if (gk_optind == 0) + gk_optind = 1; /* Don't scan ARGV[0], the program name. */ + + optstring = gk_getopt_initialize (argc, argv, optstring); + gk_getopt_initialized = 1; + } + + /* Test whether ARGV[gk_optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0') + + if (nextchar == NULL || *nextchar == '\0') { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > gk_optind) + last_nonopt = gk_optind; + if (first_nonopt > gk_optind) + first_nonopt = gk_optind; + + if (ordering == PERMUTE) { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != gk_optind) + exchange ((char **) argv); + else if (last_nonopt != gk_optind) + first_nonopt = gk_optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (gk_optind < argc && NONOPTION_P) + gk_optind++; + + last_nonopt = gk_optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) { + gk_optind++; + + if (first_nonopt != last_nonopt && last_nonopt != gk_optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = gk_optind; + last_nonopt = argc; + + gk_optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (gk_optind == argc) { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + gk_optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) { + if (ordering == REQUIRE_ORDER) + return -1; + gk_optarg = argv[gk_optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) { + char *nameend; + struct gk_option *p; + struct gk_option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) { + if (!strncmp (p->name, nextchar, nameend - nextchar)) { + if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + } + + if (ambig && !exact) { + if (print_errors) + fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]); + + nextchar += strlen (nextchar); + gk_optind++; + gk_optopt = 0; + return '?'; + } + + if (pfound != NULL) { + option_index = indfound; + gk_optind++; + if (*nameend) { + /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ + if (pfound->has_arg) + gk_optarg = nameend + 1; + else { + if (print_errors) { + if (argv[gk_optind - 1][1] == '-') + /* --option */ + fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name); + else + /* +option or -option */ + fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name); + } + + nextchar += strlen (nextchar); + + gk_optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) { + if (gk_optind < argc) + gk_optarg = argv[gk_optind++]; + else { + if (print_errors) + fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]); + nextchar += strlen (nextchar); + gk_optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. Otherwise interpret it as a short option. */ + if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) { + if (print_errors) { + if (argv[gk_optind][1] == '-') + /* --option */ + fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar); + else + /* +option or -option */ + fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar); + } + nextchar = (char *) ""; + gk_optind++; + gk_optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + { + char c = *nextchar++; + char *temp = strchr(optstring, c); + + /* Increment `gk_optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++gk_optind; + + if (temp == NULL || c == ':') { + if (print_errors) { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c); + else + fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c); + } + gk_optopt = c; + return '?'; + } + + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') { + char *nameend; + struct gk_option *p; + struct gk_option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + gk_optind++; + } + else if (gk_optind == argc) { + if (print_errors) { + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c); + } + gk_optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */ + gk_optarg = argv[gk_optind++]; + + /* gk_optarg is now the argument, see if it's in the table of longopts. */ + + for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) { + if (!strncmp (p->name, nextchar, nameend - nextchar)) { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + } + if (ambig && !exact) { + if (print_errors) + fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]); + nextchar += strlen (nextchar); + gk_optind++; + return '?'; + } + if (pfound != NULL) { + option_index = indfound; + if (*nameend) { + /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ + if (pfound->has_arg) + gk_optarg = nameend + 1; + else { + if (print_errors) + fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) { + if (gk_optind < argc) + gk_optarg = argv[gk_optind++]; + else { + if (print_errors) + fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + + if (temp[1] == ':') { + if (temp[2] == ':') { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + gk_optind++; + } + else + gk_optarg = NULL; + nextchar = NULL; + } + else { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') { + gk_optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */ + gk_optind++; + } + else if (gk_optind == argc) { + if (print_errors) { + /* 1003.2 specifies the format of this message. */ + fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c); + } + gk_optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */ + gk_optarg = argv[gk_optind++]; + nextchar = NULL; + } + } + return c; + } +} + + + +/*************************************************************************/ +/*! \brief Parse command-line arguments + +The gk_getopt() function gets the next option argument from the argument +list specified by the \c argv and \c argc arguments. Normally these values +come directly from the arguments received by main(). + +\param argc is the number of command line arguments passed to main(). +\param argv is an array of strings storing the above command line + arguments. +\param options is a string that specifies the option characters that + are valid for this program. An option character in this string + can be followed by a colon (`:') to indicate that it takes a + required argument. If an option character is followed by two + colons (`::'), its argument is optional; this is a GNU extension. + +\return +It returns the option character for the next command line option. When no +more option arguments are available, it returns -1. There may still be +more non-option arguments; you must compare the external variable +#gk_optind against the \c argc parameter to check this. + +\return +If the option has an argument, gk_getopt() returns the argument by storing +it in the variable #gk_optarg. You don't ordinarily need to copy the +#gk_optarg string, since it is a pointer into the original \c argv array, +not into a static area that might be overwritten. + +\return +If gk_getopt() finds an option character in \c argv that was not included +in options, or a missing option argument, it returns `?' and sets the +external variable #gk_optopt to the actual option character. +If the first character of options is a colon (`:'), then gk_getopt() +returns `:' instead of `?' to indicate a missing option argument. +In addition, if the external variable #gk_opterr is nonzero (which is +the default), gk_getopt() prints an error message. This variable is +set by gk_getopt() to point at the value of the option argument, +for those options that accept arguments. + + +gk_getopt() has three ways to deal with options that follow non-options +\c argv elements. The special argument <tt>`--'</tt> forces in all cases +the end of option scanning. + - The default is to permute the contents of \c argv while scanning it + so that eventually all the non-options are at the end. This allows + options to be given in any order, even with programs that were not + written to expect this. + - If the options argument string begins with a hyphen (`-'), this is + treated specially. It permits arguments that are not options to be + returned as if they were associated with option character `\\1'. + - POSIX demands the following behavior: The first non-option stops + option processing. This mode is selected by either setting the + environment variable POSIXLY_CORRECT or beginning the options + argument string with a plus sign (`+'). + +*/ +/*************************************************************************/ +int gk_getopt(int argc, char **argv, char *options) +{ + return gk_getopt_internal(argc, argv, options, NULL, NULL, 0); +} + + +/*************************************************************************/ +/*! \brief Parse command-line arguments with long options + +This function accepts GNU-style long options as well as single-character +options. + +\param argc is the number of command line arguments passed to main(). +\param argv is an array of strings storing the above command line + arguments. +\param options describes the short options to accept, just as it does + in gk_getopt(). +\param long_options describes the long options to accept. See the + defintion of ::gk_option for more information. +\param opt_index this is a returned variable. For any long option, + gk_getopt_long() tells you the index in the array \c long_options + of the options definition, by storing it into <tt>*opt_index</tt>. + You can get the name of the option with <tt>longopts[*opt_index].name</tt>. + So you can distinguish among long options either by the values + in their val fields or by their indices. You can also distinguish + in this way among long options that set flags. + + +\return +When gk_getopt_long() encounters a short option, it does the same thing +that gk_getopt() would do: it returns the character code for the option, +and stores the options argument (if it has one) in #gk_optarg. + +\return +When gk_getopt_long() encounters a long option, it takes actions based +on the flag and val fields of the definition of that option. + +\return +If flag is a null pointer, then gk_getopt_long() returns the contents +of val to indicate which option it found. You should arrange distinct +values in the val field for options with different meanings, so you +can decode these values after gk_getopt_long() returns. If the long +option is equivalent to a short option, you can use the short option's +character code in val. + +\return +If flag is not a null pointer, that means this option should just set +a flag in the program. The flag is a variable of type int that you +define. Put the address of the flag in the flag field. Put in the +val field the value you would like this option to store in the flag. +In this case, gk_getopt_long() returns 0. + +\return +When a long option has an argument, gk_getopt_long() puts the argument +value in the variable #gk_optarg before returning. When the option has +no argument, the value in #gk_optarg is a null pointer. This is +how you can tell whether an optional argument was supplied. + +\return +When gk_getopt_long() has no more options to handle, it returns -1, +and leaves in the variable #gk_optind the index in argv of the next +remaining argument. +*/ +/*************************************************************************/ +int gk_getopt_long( int argc, char **argv, char *options, + struct gk_option *long_options, int *opt_index) +{ + return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + + + +/*************************************************************************/ +/*! \brief Parse command-line arguments with only long options + +Like gk_getopt_long(), but '-' as well as '--' can indicate a long option. +If an option that starts with '-' (not '--') doesn't match a long option, +but does match a short option, it is parsed as a short option instead. +*/ +/*************************************************************************/ +int gk_getopt_long_only(int argc, char **argv, char *options, + struct gk_option *long_options, int *opt_index) +{ + return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_arch.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_arch.h new file mode 100644 index 000000000..8c8ac50ed --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_arch.h @@ -0,0 +1,68 @@ +/*! +\file gk_arch.h +\brief This file contains various architecture-specific declerations + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_arch.h 21637 2018-01-03 22:37:24Z karypis $ \endverbatim +*/ + +#ifndef _GK_ARCH_H_ +#define _GK_ARCH_H_ + +/************************************************************************* +* Architecture-specific differences in header files +**************************************************************************/ +#ifdef LINUX +#if !defined(__USE_XOPEN) +#define __USE_XOPEN +#endif +#if !defined(_XOPEN_SOURCE) +#define _XOPEN_SOURCE 600 +#endif +#if !defined(__USE_XOPEN2K) +#define __USE_XOPEN2K +#endif +#endif + + +#ifdef HAVE_EXECINFO_H +#include <execinfo.h> +#endif + + +#ifdef __MSC__ + #include "ms_stdint.h" + #include "ms_inttypes.h" + #include "ms_stat.h" + #include "win32/adapt.h" +#else +#ifndef SUNOS + #include <stdint.h> +#endif + #include <inttypes.h> + #include <sys/types.h> + #include <sys/resource.h> + #include <sys/time.h> + #include <unistd.h> +#endif + + +/************************************************************************* +* Architecture-specific modifications +**************************************************************************/ +#ifdef WIN32 +typedef ptrdiff_t ssize_t; +#endif + + +#ifdef SUNOS +#define PTRDIFF_MAX INT64_MAX +#endif + +/* MSC does not have INFINITY defined */ +#ifndef INFINITY +#define INFINITY FLT_MAX +#endif + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_defs.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_defs.h new file mode 100644 index 000000000..68cb9a4cc --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_defs.h @@ -0,0 +1,87 @@ +/*! +\file gk_defs.h +\brief This file contains various constants definitions + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_defs.h 22039 2018-05-26 16:34:48Z karypis $ \endverbatim +*/ + +#ifndef _GK_DEFS_H_ +#define _GK_DEFS_H_ + + +#define LTERM (void **) 0 /* List terminator for GKfree() */ + +/* mopt_t types */ +#define GK_MOPT_MARK 1 +#define GK_MOPT_CORE 2 +#define GK_MOPT_HEAP 3 + +#define HTABLE_EMPTY -1 +#define HTABLE_DELETED -2 +#define HTABLE_FIRST 1 +#define HTABLE_NEXT 2 + +/* pdb corruption bit switches */ +#define CRP_ALTLOCS 1 +#define CRP_MISSINGCA 2 +#define CRP_MISSINGBB 4 +#define CRP_MULTICHAIN 8 +#define CRP_MULTICA 16 +#define CRP_MULTIBB 32 + +#define MAXLINELEN 300000 + +/* GKlib signals to standard signal mapping */ +#define SIGMEM SIGABRT +#define SIGERR SIGTERM + + +/* CSR-related defines */ +#define GK_CSR_ROW 1 +#define GK_CSR_COL 2 +#define GK_CSR_ROWCOL 3 + +#define GK_CSR_MAXTF 1 +#define GK_CSR_SQRT 2 +#define GK_CSR_POW25 3 +#define GK_CSR_POW65 4 +#define GK_CSR_POW75 5 +#define GK_CSR_POW85 6 +#define GK_CSR_LOG 7 +#define GK_CSR_IDF 8 +#define GK_CSR_IDF2 9 +#define GK_CSR_MAXTF2 10 + +#define GK_CSR_DOTP 1 +#define GK_CSR_COS 2 +#define GK_CSR_JAC 3 +#define GK_CSR_MIN 4 +#define GK_CSR_AMIN 5 + +#define GK_CSR_FMT_AUTO 2 +#define GK_CSR_FMT_CLUTO 1 +#define GK_CSR_FMT_CSR 2 +#define GK_CSR_FMT_METIS 3 +#define GK_CSR_FMT_BINROW 4 +#define GK_CSR_FMT_BINCOL 5 +#define GK_CSR_FMT_IJV 6 +#define GK_CSR_FMT_BIJV 7 + +#define GK_CSR_SYM_SUM 1 +#define GK_CSR_SYM_MIN 2 +#define GK_CSR_SYM_MAX 3 +#define GK_CSR_SYM_AVG 4 + + +#define GK_GRAPH_FMT_METIS 1 +#define GK_GRAPH_FMT_IJV 2 +#define GK_GRAPH_FMT_HIJV 3 + +#define GK_GRAPH_SYM_SUM 1 +#define GK_GRAPH_SYM_MIN 2 +#define GK_GRAPH_SYM_MAX 3 +#define GK_GRAPH_SYM_AVG 4 + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_externs.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_externs.h new file mode 100644 index 000000000..2c0fdd968 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_externs.h @@ -0,0 +1,25 @@ +/*! +\file gk_externs.h +\brief This file contains definitions of external variables created by GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_EXTERNS_H_ +#define _GK_EXTERNS_H_ + + +/************************************************************************* +* Extern variable definition. Hopefully, the __thread makes them thread-safe. +**************************************************************************/ +#ifndef _GK_ERROR_C_ +/* declared in error.c */ +extern __thread int gk_cur_jbufs; +extern __thread jmp_buf gk_jbufs[]; +extern __thread jmp_buf gk_jbuf; + +#endif + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_getopt.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_getopt.h new file mode 100644 index 000000000..4bb86115f --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_getopt.h @@ -0,0 +1,64 @@ +/*! +\file gk_getopt.h +\brief This file contains GNU's externs/structs/prototypes + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_GETOPT_H_ +#define _GK_GETOPT_H_ + + +/* Externals from getopt.c */ +extern char *gk_optarg; +extern int gk_optind; +extern int gk_opterr; +extern int gk_optopt; + + +/*! \brief The structure that stores the information about the command-line options + +This structure describes a single long option name for the sake of +gk_getopt_long(). The argument <tt>long_options</tt> must be an array +of these structures, one for each long option. Terminate the array with +an element containing all zeros. +*/ +struct gk_option { + char *name; /*!< This field is the name of the option. */ + int has_arg; /*!< This field says whether the option takes an argument. + It is an integer, and there are three legitimate values: + no_argument, required_argument and optional_argument. + */ + int *flag; /*!< See the discussion on ::gk_option#val */ + int val; /*!< These fields control how to report or act on the option + when it occurs. + + If flag is a null pointer, then the val is a value which + identifies this option. Often these values are chosen + to uniquely identify particular long options. + + If flag is not a null pointer, it should be the address + of an int variable which is the flag for this option. + The value in val is the value to store in the flag to + indicate that the option was seen. */ +}; + +/* Names for the values of the `has_arg' field of `struct gk_option'. */ +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + + +/* Function prototypes */ +extern int gk_getopt(int __argc, char **__argv, char *__shortopts); +extern int gk_getopt_long(int __argc, char **__argv, char *__shortopts, + struct gk_option *__longopts, int *__longind); +extern int gk_getopt_long_only (int __argc, char **__argv, + char *__shortopts, struct gk_option *__longopts, int *__longind); + + + +#endif + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_macros.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_macros.h new file mode 100644 index 000000000..c3f1b4530 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_macros.h @@ -0,0 +1,169 @@ +/*! +\file gk_macros.h +\brief This file contains various macros + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_macros.h 15048 2013-08-31 19:38:14Z karypis $ \endverbatim +*/ + +#ifndef _GK_MACROS_H_ +#define _GK_MACROS_H_ + +/*------------------------------------------------------------- + * Usefull commands + *-------------------------------------------------------------*/ +#define gk_max(a, b) ((a) >= (b) ? (a) : (b)) +#define gk_min(a, b) ((a) >= (b) ? (b) : (a)) +#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c))) +#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) +#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0) +#define sign(a, b) ((a >= 0 ? b : -b)) + +#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0)) +#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand())) +#define RandomInRange_r(s, u) ((int) (ONEOVERRANDMAX*(u)*rand_r(s))) + +#define gk_abs(x) ((x) >= 0 ? (x) : -(x)) + + +/*------------------------------------------------------------- + * Timing macros + *-------------------------------------------------------------*/ +#define gk_clearcputimer(tmr) (tmr = 0.0) +#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds()) +#define gk_stopcputimer(tmr) (tmr += gk_CPUSeconds()) +#define gk_getcputimer(tmr) (tmr) + +#define gk_clearwctimer(tmr) (tmr = 0.0) +#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds()) +#define gk_stopwctimer(tmr) (tmr += gk_WClockSeconds()) +#define gk_getwctimer(tmr) (tmr) + +/*------------------------------------------------------------- + * dbglvl handling macros + *-------------------------------------------------------------*/ +#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd); + + +/*------------------------------------------------------------- + * gracefull library exit macro + *-------------------------------------------------------------*/ +#define GKSETJMP() (setjmp(gk_return_to_entry)) +#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs])) + + +/*------------------------------------------------------------- + * Debuging memory leaks + *-------------------------------------------------------------*/ +#ifdef DMALLOC +# define MALLOC_CHECK(ptr) \ + if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) { \ + printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \ + __LINE__, __FILE__); \ + abort(); \ + } +#else +# define MALLOC_CHECK(ptr) ; +#endif + + +/*------------------------------------------------------------- + * CSR conversion macros + *-------------------------------------------------------------*/ +#define MAKECSR(i, n, a) \ + do { \ + for (i=1; i<n; i++) a[i] += a[i-1]; \ + for (i=n; i>0; i--) a[i] = a[i-1]; \ + a[0] = 0; \ + } while(0) + +#define SHIFTCSR(i, n, a) \ + do { \ + for (i=n; i>0; i--) a[i] = a[i-1]; \ + a[0] = 0; \ + } while(0) + + +/*------------------------------------------------------------- + * ASSERTS that cannot be turned off! + *-------------------------------------------------------------*/ +#define GKASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + abort(); \ + } + +#define GKASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + abort(); \ + } + +#define GKCUASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + } + +#define GKWARN(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + } + +#define GKCUASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + } + +#define GKWARNP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + } + + +/*------------------------------------------------------------- + * Program Assertions + *-------------------------------------------------------------*/ +#ifndef NDEBUG +# define ASSERT(expr) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + assert(expr); \ + } + +# define ASSERTP(expr,msg) \ + if (!(expr)) { \ + printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \ + __LINE__, __FILE__); \ + printf msg ; \ + printf("\n"); \ + assert(expr); \ + } +#else +# define ASSERT(expr) ; +# define ASSERTP(expr,msg) ; +#endif + +#ifndef NDEBUG2 +# define ASSERT2 ASSERT +# define ASSERTP2 ASSERTP +#else +# define ASSERT2(expr) ; +# define ASSERTP2(expr,msg) ; +#endif + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkblas.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkblas.h new file mode 100644 index 000000000..1231669db --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkblas.h @@ -0,0 +1,203 @@ +/*! +\file gk_mkblas.h +\brief Templates for BLAS-like routines + +\date Started 3/28/07 +\author George +\version\verbatim $Id: gk_mkblas.h 16304 2014-02-25 14:27:19Z karypis $ \endverbatim +*/ + +#ifndef _GK_MKBLAS_H_ +#define _GK_MKBLAS_H_ + + +#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \ +/*************************************************************************/\ +/*! The macro for gk_?incset()-class of routines */\ +/*************************************************************************/\ +TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\ +{\ + size_t i;\ +\ + for (i=0; i<n; i++)\ + x[i] = baseval+i;\ +\ + return x;\ +}\ +\ +/*************************************************************************/\ +/*! The macro for gk_?max()-class of routines */\ +/*************************************************************************/\ +TYPE PRFX ## max(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i;\ + TYPE max;\ +\ + if (n <= 0) return (TYPE) 0;\ +\ + for (max=(*x), x+=incx, i=1; i<n; i++, x+=incx)\ + max = ((*x) > max ? (*x) : max);\ +\ + return max;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?min()-class of routines */\ +/*************************************************************************/\ +TYPE PRFX ## min(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i;\ + TYPE min;\ +\ + if (n <= 0) return (TYPE) 0;\ +\ + for (min=(*x), x+=incx, i=1; i<n; i++, x+=incx)\ + min = ((*x) < min ? (*x) : min);\ +\ + return min;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?argmax()-class of routines */\ +/*************************************************************************/\ +size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i, j, max=0;\ +\ + for (i=1, j=incx; i<n; i++, j+=incx)\ + max = (x[j] > x[max] ? j : max);\ +\ + return (size_t)(max/incx);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?argmin()-class of routines */\ +/*************************************************************************/\ +size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i, j, min=0;\ +\ + for (i=1, j=incx; i<n; i++, j+=incx)\ + min = (x[j] < x[min] ? j : min);\ +\ + return (size_t)(min/incx);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?argmax_n()-class of routines */\ +/*************************************************************************/\ +size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k)\ +{\ + size_t i, j, max_n;\ + PRFX ## kv_t *cand;\ +\ + cand = PRFX ## kvmalloc(n, "GK_ARGMAX_N: cand");\ +\ + for (i=0, j=0; i<n; i++, j+=incx) {\ + cand[i].val = i;\ + cand[i].key = x[j];\ + }\ + PRFX ## kvsortd(n, cand);\ +\ + max_n = cand[k-1].val;\ +\ + gk_free((void *)&cand, LTERM);\ +\ + return max_n;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?sum()-class of routines */\ +/**************************************************************************/\ +OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i;\ + OUTTYPE sum = 0;\ +\ + for (i=0; i<n; i++, x+=incx)\ + sum += (*x);\ +\ + return sum;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?scale()-class of routines */\ +/**************************************************************************/\ +TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx)\ +{\ + size_t i;\ +\ + for (i=0; i<n; i++, x+=incx)\ + (*x) *= alpha;\ +\ + return x;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?norm2()-class of routines */\ +/**************************************************************************/\ +OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx)\ +{\ + size_t i;\ + OUTTYPE partial = 0;\ +\ + for (i=0; i<n; i++, x+=incx)\ + partial += (*x) * (*x);\ +\ + return (partial > 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?dot()-class of routines */\ +/**************************************************************************/\ +OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\ +{\ + size_t i;\ + OUTTYPE partial = 0.0;\ + \ + for (i=0; i<n; i++, x+=incx, y+=incy)\ + partial += (*x) * (*y);\ +\ + return partial;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?axpy()-class of routines */\ +/**************************************************************************/\ +TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy)\ +{\ + size_t i;\ + TYPE *y_in = y;\ +\ + for (i=0; i<n; i++, x+=incx, y+=incy)\ + *y += alpha*(*x);\ +\ + return y_in;\ +}\ + + + +#define GK_MKBLAS_PROTO(PRFX, TYPE, OUTTYPE) \ + TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x);\ + TYPE PRFX ## max(size_t n, TYPE *x, size_t incx);\ + TYPE PRFX ## min(size_t n, TYPE *x, size_t incx);\ + size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx);\ + size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx);\ + size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k);\ + OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx);\ + TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx);\ + OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx);\ + OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy);\ + TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy);\ + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkmemory.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkmemory.h new file mode 100644 index 000000000..78e216e0e --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkmemory.h @@ -0,0 +1,142 @@ +/*! +\file gk_mkmemory.h +\brief Templates for memory allocation routines + +\date Started 3/29/07 +\author George +\version\verbatim $Id: gk_mkmemory.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_MKMEMORY_H_ +#define _GK_MKMEMORY_H_ + + +#define GK_MKALLOC(PRFX, TYPE)\ +/*************************************************************************/\ +/*! The macro for gk_?malloc()-class of routines */\ +/**************************************************************************/\ +TYPE *PRFX ## malloc(size_t n, char *msg)\ +{\ + return (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?realloc()-class of routines */\ +/**************************************************************************/\ +TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg)\ +{\ + return (TYPE *)gk_realloc((void *)ptr, sizeof(TYPE)*n, msg);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?smalloc()-class of routines */\ +/**************************************************************************/\ +TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg)\ +{\ + TYPE *ptr;\ +\ + ptr = (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\ + if (ptr == NULL) \ + return NULL; \ +\ + return PRFX ## set(n, ival, ptr); \ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?set()-class of routines */\ +/*************************************************************************/\ +TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x)\ +{\ + size_t i;\ +\ + for (i=0; i<n; i++)\ + x[i] = val;\ +\ + return x;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?set()-class of routines */\ +/*************************************************************************/\ +TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b)\ +{\ + return (TYPE *)memmove((void *)b, (void *)a, sizeof(TYPE)*n);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?AllocMatrix()-class of routines */\ +/**************************************************************************/\ +TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg)\ +{\ + gk_idx_t i, j;\ + TYPE **matrix;\ +\ + matrix = (TYPE **)gk_malloc(ndim1*sizeof(TYPE *), errmsg);\ + if (matrix == NULL) \ + return NULL;\ +\ + for (i=0; i<ndim1; i++) { \ + matrix[i] = PRFX ## smalloc(ndim2, value, errmsg);\ + if (matrix[i] == NULL) { \ + for (j=0; j<i; j++) \ + gk_free((void **)&matrix[j], LTERM); \ + return NULL; \ + } \ + }\ +\ + return matrix;\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?AllocMatrix()-class of routines */\ +/**************************************************************************/\ +void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2)\ +{\ + gk_idx_t i;\ + TYPE **matrix;\ +\ + if (*r_matrix == NULL) \ + return; \ +\ + matrix = *r_matrix;\ +\ + for (i=0; i<ndim1; i++) \ + gk_free((void **)&(matrix[i]), LTERM);\ +\ + gk_free((void **)r_matrix, LTERM);\ +}\ +\ +\ +/*************************************************************************/\ +/*! The macro for gk_?SetMatrix()-class of routines */\ +/**************************************************************************/\ +void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value)\ +{\ + gk_idx_t i, j;\ +\ + for (i=0; i<ndim1; i++) {\ + for (j=0; j<ndim2; j++)\ + matrix[i][j] = value;\ + }\ +}\ + + +#define GK_MKALLOC_PROTO(PRFX, TYPE)\ + TYPE *PRFX ## malloc(size_t n, char *msg);\ + TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg);\ + TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg);\ + TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x);\ + TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b);\ + TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg);\ + void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2);\ + void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value);\ + + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue.h new file mode 100644 index 000000000..50a5385b4 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue.h @@ -0,0 +1,440 @@ +/*! +\file gk_mkpqueue.h +\brief Templates for priority queues + +\date Started 4/09/07 +\author George +\version\verbatim $Id: gk_mkpqueue.h 21742 2018-01-26 16:59:15Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKPQUEUE_H +#define _GK_MKPQUEUE_H + + +#define GK_MKPQUEUE(FPRFX, PQT, KVT, KT, VT, KVMALLOC, KMAX, KEY_LT)\ +/*************************************************************************/\ +/*! This function creates and initializes a priority queue */\ +/**************************************************************************/\ +PQT *FPRFX ## Create(size_t maxnodes)\ +{\ + PQT *queue; \ +\ + queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate: queue");\ + FPRFX ## Init(queue, maxnodes);\ +\ + return queue;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function initializes the data structures of the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Init(PQT *queue, size_t maxnodes)\ +{\ + queue->nnodes = 0;\ + queue->maxnodes = maxnodes;\ +\ + queue->heap = KVMALLOC(maxnodes, "gk_PQInit: heap");\ + queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function resets the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Reset(PQT *queue)\ +{\ + ssize_t i;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + for (i=queue->nnodes-1; i>=0; i--)\ + locator[heap[i].val] = -1;\ + queue->nnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Free(PQT *queue)\ +{\ + if (queue == NULL) return;\ + gk_free((void **)&queue->heap, &queue->locator, LTERM);\ + queue->maxnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue \ + and the queue itself */\ +/**************************************************************************/\ +void FPRFX ## Destroy(PQT *queue)\ +{\ + if (queue == NULL) return;\ + FPRFX ## Free(queue);\ + gk_free((void **)&queue, LTERM);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the length of the queue */\ +/**************************************************************************/\ +size_t FPRFX ## Length(PQT *queue)\ +{\ + return queue->nnodes;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function adds an item in the priority queue */\ +/**************************************************************************/\ +int FPRFX ## Insert(PQT *queue, VT node, KT key)\ +{\ + ssize_t i, j;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + ASSERT(locator[node] == -1);\ +\ + i = queue->nnodes++;\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(key, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + ASSERT(i >= 0);\ + heap[i].key = key;\ + heap[i].val = node;\ + locator[node] = i;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function deletes an item from the priority queue */\ +/**************************************************************************/\ +int FPRFX ## Delete(PQT *queue, VT node)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + KT newkey, oldkey;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + ASSERT(locator[node] != -1);\ + ASSERT(heap[locator[node]].val == node);\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + i = locator[node];\ + locator[node] = -1;\ +\ + if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\ + node = heap[queue->nnodes].val;\ + newkey = heap[queue->nnodes].key;\ + oldkey = heap[i].key;\ +\ + if (KEY_LT(newkey, oldkey)) { /* Filter-up */\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(newkey, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ + else { /* Filter down */\ + nnodes = queue->nnodes;\ + while ((j=(i<<1)+1) < nnodes) {\ + if (KEY_LT(heap[j].key, newkey)) {\ + if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ +\ + heap[i].key = newkey;\ + heap[i].val = node;\ + locator[node] = i;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function updates the key values associated for a particular item */ \ +/**************************************************************************/\ +void FPRFX ## Update(PQT *queue, VT node, KT newkey)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + KT oldkey;\ + ssize_t *locator=queue->locator;\ + KVT *heap=queue->heap;\ +\ + oldkey = heap[locator[node]].key;\ + if (!KEY_LT(newkey, oldkey) && !KEY_LT(oldkey, newkey)) return;\ +\ + ASSERT(locator[node] != -1);\ + ASSERT(heap[locator[node]].val == node);\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + i = locator[node];\ +\ + if (KEY_LT(newkey, oldkey)) { /* Filter-up */\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(newkey, heap[j].key)) {\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ + else { /* Filter down */\ + nnodes = queue->nnodes;\ + while ((j=(i<<1)+1) < nnodes) {\ + if (KEY_LT(heap[j].key, newkey)) {\ + if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\ + j++;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ + }\ +\ + heap[i].key = newkey;\ + heap[i].val = node;\ + locator[node] = i;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + return;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue and removes\ + it from the priority queue */\ +/**************************************************************************/\ +VT FPRFX ## GetTop(PQT *queue)\ +{\ + ssize_t i, j;\ + ssize_t *locator;\ + KVT *heap;\ + VT vtx, node;\ + KT key;\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ +\ + if (queue->nnodes == 0)\ + return -1;\ +\ + queue->nnodes--;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ +\ + vtx = heap[0].val;\ + locator[vtx] = -1;\ +\ + if ((i = queue->nnodes) > 0) {\ + key = heap[i].key;\ + node = heap[i].val;\ + i = 0;\ + while ((j=2*i+1) < queue->nnodes) {\ + if (KEY_LT(heap[j].key, key)) {\ + if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\ + j = j+1;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\ + j = j+1;\ + heap[i] = heap[j];\ + locator[heap[i].val] = i;\ + i = j;\ + }\ + else\ + break;\ + }\ +\ + heap[i].key = key;\ + heap[i].val = node;\ + locator[node] = i;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap(queue));\ + return vtx;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +VT FPRFX ## SeeTopVal(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of the top item. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +KT FPRFX ## SeeTopKey(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of a specific item */\ +/**************************************************************************/\ +KT FPRFX ## SeeKey(PQT *queue, VT node)\ +{\ + ssize_t *locator;\ + KVT *heap;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ +\ + return heap[locator[node]].key;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the first item in a breadth-first traversal of\ + the heap whose key is less than maxwgt. This function is here due to\ + hMETIS and is not general!*/\ +/**************************************************************************/\ +/*\ +VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\ +{\ + ssize_t i;\ +\ + if (queue->nnodes == 0)\ + return -1;\ +\ + if (maxwgt <= 1000)\ + return FPRFX ## SeeTopVal(queue);\ +\ + for (i=0; i<queue->nnodes; i++) {\ + if (queue->heap[i].key > 0) {\ + if (wgts[queue->heap[i].val] <= maxwgt)\ + return queue->heap[i].val;\ + }\ + else {\ + if (queue->heap[i/2].key <= 0)\ + break;\ + }\ + }\ +\ + return queue->heap[0].val;\ +\ +}\ +*/\ +\ +\ +/*************************************************************************/\ +/*! This functions checks the consistency of the heap */\ +/**************************************************************************/\ +int FPRFX ## CheckHeap(PQT *queue)\ +{\ + ssize_t i, j;\ + size_t nnodes;\ + ssize_t *locator;\ + KVT *heap;\ +\ + heap = queue->heap;\ + locator = queue->locator;\ + nnodes = queue->nnodes;\ +\ + if (nnodes == 0)\ + return 1;\ +\ + ASSERT(locator[heap[0].val] == 0);\ + for (i=1; i<nnodes; i++) {\ + ASSERT(locator[heap[i].val] == i);\ + ASSERT(!KEY_LT(heap[i].key, heap[(i-1)/2].key));\ + }\ + for (i=1; i<nnodes; i++)\ + ASSERT(!KEY_LT(heap[i].key, heap[0].key));\ +\ + for (j=i=0; i<queue->maxnodes; i++) {\ + if (locator[i] != -1)\ + j++;\ + }\ + ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\ +\ + return 1;\ +}\ + + +#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\ + PQT * FPRFX ## Create(size_t maxnodes);\ + void FPRFX ## Init(PQT *queue, size_t maxnodes);\ + void FPRFX ## Reset(PQT *queue);\ + void FPRFX ## Free(PQT *queue);\ + void FPRFX ## Destroy(PQT *queue);\ + size_t FPRFX ## Length(PQT *queue);\ + int FPRFX ## Insert(PQT *queue, VT node, KT key);\ + int FPRFX ## Delete(PQT *queue, VT node);\ + void FPRFX ## Update(PQT *queue, VT node, KT newkey);\ + VT FPRFX ## GetTop(PQT *queue);\ + VT FPRFX ## SeeTopVal(PQT *queue);\ + KT FPRFX ## SeeTopKey(PQT *queue);\ + KT FPRFX ## SeeKey(PQT *queue, VT node);\ + VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\ + int FPRFX ## CheckHeap(PQT *queue);\ + + +/* This is how these macros are used +GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX) +GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t) +*/ + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue2.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue2.h new file mode 100644 index 000000000..10e8ee462 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkpqueue2.h @@ -0,0 +1,215 @@ +/*! +\file gk_mkpqueue2.h +\brief Templates for priority queues that do not utilize locators and as such + they can use different types of values. + +\date Started 4/09/07 +\author George +\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKPQUEUE2_H +#define _GK_MKPQUEUE2_H + + +#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\ +/*************************************************************************/\ +/*! This function creates and initializes a priority queue */\ +/**************************************************************************/\ +PQT *FPRFX ## Create2(ssize_t maxnodes)\ +{\ + PQT *queue; \ +\ + if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\ + memset(queue, 0, sizeof(PQT));\ + queue->nnodes = 0;\ + queue->maxnodes = maxnodes;\ + queue->keys = KMALLOC(maxnodes, "gk_pqCreate2: keys");\ + queue->vals = VMALLOC(maxnodes, "gk_pqCreate2: vals");\ +\ + if (queue->keys == NULL || queue->vals == NULL)\ + gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\ + }\ +\ + return queue;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function resets the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Reset2(PQT *queue)\ +{\ + queue->nnodes = 0;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function frees the internal datastructures of the priority queue */\ +/**************************************************************************/\ +void FPRFX ## Destroy2(PQT **r_queue)\ +{\ + PQT *queue = *r_queue; \ + if (queue == NULL) return;\ + gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\ + *r_queue = NULL;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the length of the queue */\ +/**************************************************************************/\ +size_t FPRFX ## Length2(PQT *queue)\ +{\ + return queue->nnodes;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function adds an item in the priority queue. */\ +/**************************************************************************/\ +int FPRFX ## Insert2(PQT *queue, VT val, KT key)\ +{\ + ssize_t i, j;\ + KT *keys=queue->keys;\ + VT *vals=queue->vals;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + if (queue->nnodes == queue->maxnodes) \ + return 0;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + i = queue->nnodes++;\ + while (i > 0) {\ + j = (i-1)>>1;\ + if (KEY_LT(key, keys[j])) {\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else\ + break;\ + }\ + ASSERT(i >= 0);\ + keys[i] = key;\ + vals[i] = val;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue and removes\ + it from the priority queue */\ +/**************************************************************************/\ +int FPRFX ## GetTop2(PQT *queue, VT *r_val)\ +{\ + ssize_t i, j;\ + KT key, *keys=queue->keys;\ + VT val, *vals=queue->vals;\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + if (queue->nnodes == 0)\ + return 0;\ +\ + queue->nnodes--;\ +\ + *r_val = vals[0];\ +\ + if ((i = queue->nnodes) > 0) {\ + key = keys[i];\ + val = vals[i];\ + i = 0;\ + while ((j=2*i+1) < queue->nnodes) {\ + if (KEY_LT(keys[j], key)) {\ + if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\ + j = j+1;\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\ + j = j+1;\ + keys[i] = keys[j];\ + vals[i] = vals[j];\ + i = j;\ + }\ + else\ + break;\ + }\ +\ + keys[i] = key;\ + vals[i] = val;\ + }\ +\ + ASSERT2(FPRFX ## CheckHeap2(queue));\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the item at the top of the queue. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\ +{\ + if (queue->nnodes == 0) \ + return 0;\ +\ + *r_val = queue->vals[0];\ +\ + return 1;\ +}\ +\ +\ +/*************************************************************************/\ +/*! This function returns the key of the top item. The item is not\ + deleted from the queue. */\ +/**************************************************************************/\ +KT FPRFX ## SeeTopKey2(PQT *queue)\ +{\ + return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\ +}\ +\ +\ +/*************************************************************************/\ +/*! This functions checks the consistency of the heap */\ +/**************************************************************************/\ +int FPRFX ## CheckHeap2(PQT *queue)\ +{\ + ssize_t i;\ + KT *keys=queue->keys;\ +\ + if (queue->nnodes == 0)\ + return 1;\ +\ + for (i=1; i<queue->nnodes; i++) {\ + ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\ + }\ + for (i=1; i<queue->nnodes; i++)\ + ASSERT(!KEY_LT(keys[i], keys[0]));\ +\ + return 1;\ +}\ + + +#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\ + PQT * FPRFX ## Create2(ssize_t maxnodes);\ + void FPRFX ## Reset2(PQT *queue);\ + void FPRFX ## Destroy2(PQT **r_queue);\ + size_t FPRFX ## Length2(PQT *queue);\ + int FPRFX ## Insert2(PQT *queue, VT node, KT key);\ + int FPRFX ## GetTop2(PQT *queue, VT *r_val);\ + int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\ + KT FPRFX ## SeeTopKey2(PQT *queue);\ + int FPRFX ## CheckHeap2(PQT *queue);\ + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkrandom.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkrandom.h new file mode 100644 index 000000000..68d54fa3f --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkrandom.h @@ -0,0 +1,123 @@ +/*! +\file +\brief Templates for portable random number generation + +\date Started 5/17/07 +\author George +\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKRANDOM_H +#define _GK_MKRANDOM_H + +/*************************************************************************/\ +/*! The generator for the rand() related routines. \ + \params RNGT the datatype that defines the range of values over which\ + random numbers will be generated\ + \params VALT the datatype that defines the contents of the array to \ + be permuted by randArrayPermute() \ + \params FPRFX the function prefix \ +*/\ +/**************************************************************************/\ +#define GK_MKRANDOM(FPRFX, RNGT, VALT)\ +/*************************************************************************/\ +/*! Initializes the generator */ \ +/**************************************************************************/\ +void FPRFX ## srand(RNGT seed) \ +{\ + gk_randinit((uint64_t) seed);\ +}\ +\ +\ +/*************************************************************************/\ +/*! Returns a random number */ \ +/**************************************************************************/\ +RNGT FPRFX ## rand() \ +{\ + if (sizeof(RNGT) <= sizeof(int32_t)) \ + return (RNGT)gk_randint32(); \ + else \ + return (RNGT)gk_randint64(); \ +}\ +\ +\ +/*************************************************************************/\ +/*! Returns a random number between [0, max) */ \ +/**************************************************************************/\ +RNGT FPRFX ## randInRange(RNGT max) \ +{\ + return (RNGT)((FPRFX ## rand())%max); \ +}\ +\ +\ +/*************************************************************************/\ +/*! Randomly permutes the elements of an array p[]. \ + flag == 1, p[i] = i prior to permutation, \ + flag == 0, p[] is not initialized. */\ +/**************************************************************************/\ +void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\ +{\ + RNGT i, u, v;\ + VALT tmp;\ +\ + if (flag == 1) {\ + for (i=0; i<n; i++)\ + p[i] = (VALT)i;\ + }\ +\ + if (n < 10) {\ + for (i=0; i<n; i++) {\ + v = FPRFX ## randInRange(n);\ + u = FPRFX ## randInRange(n);\ + gk_SWAP(p[v], p[u], tmp);\ + }\ + }\ + else {\ + for (i=0; i<nshuffles; i++) {\ + v = FPRFX ## randInRange(n-3);\ + u = FPRFX ## randInRange(n-3);\ + /*gk_SWAP(p[v+0], p[u+0], tmp);*/\ + /*gk_SWAP(p[v+1], p[u+1], tmp);*/\ + /*gk_SWAP(p[v+2], p[u+2], tmp);*/\ + /*gk_SWAP(p[v+3], p[u+3], tmp);*/\ + gk_SWAP(p[v+0], p[u+2], tmp);\ + gk_SWAP(p[v+1], p[u+3], tmp);\ + gk_SWAP(p[v+2], p[u+0], tmp);\ + gk_SWAP(p[v+3], p[u+1], tmp);\ + }\ + }\ +}\ +\ +\ +/*************************************************************************/\ +/*! Randomly permutes the elements of an array p[]. \ + flag == 1, p[i] = i prior to permutation, \ + flag == 0, p[] is not initialized. */\ +/**************************************************************************/\ +void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag)\ +{\ + RNGT i, v;\ + VALT tmp;\ +\ + if (flag == 1) {\ + for (i=0; i<n; i++)\ + p[i] = (VALT)i;\ + }\ +\ + for (i=0; i<n; i++) {\ + v = FPRFX ## randInRange(n);\ + gk_SWAP(p[i], p[v], tmp);\ + }\ +}\ + + +#define GK_MKRANDOM_PROTO(FPRFX, RNGT, VALT)\ + void FPRFX ## srand(RNGT seed); \ + RNGT FPRFX ## rand(); \ + RNGT FPRFX ## randInRange(RNGT max); \ + void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag);\ + void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag);\ + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mksort.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mksort.h new file mode 100644 index 000000000..48674db67 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mksort.h @@ -0,0 +1,271 @@ +/*! +\file gk_mksort.h +\brief Templates for the qsort routine + +\date Started 3/28/07 +\author George +\version\verbatim $Id: gk_mksort.h 21051 2017-05-25 04:36:14Z karypis $ \endverbatim +*/ + + +#ifndef _GK_MKSORT_H_ +#define _GK_MKSORT_H_ + +/* Adopted from GNU glibc by Mjt. + * See stdlib/qsort.c in glibc */ + +/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* in-line qsort implementation. Differs from traditional qsort() routine + * in that it is a macro, not a function, and instead of passing an address + * of a comparision routine to the function, it is possible to inline + * comparision routine, thus speed up sorting alot. + * + * Usage: + * #include "iqsort.h" + * #define islt(a,b) (strcmp((*a),(*b))<0) + * char *arr[]; + * int n; + * GKQSORT(char*, arr, n, islt); + * + * The "prototype" and 4 arguments are: + * GKQSORT(TYPE,BASE,NELT,ISLT) + * 1) type of each element, TYPE, + * 2) address of the beginning of the array, of type TYPE*, + * 3) number of elements in the array, and + * 4) comparision routine. + * Array pointer and number of elements are referenced only once. + * This is similar to a call + * qsort(BASE,NELT,sizeof(TYPE),ISLT) + * with the difference in last parameter. + * Note the islt macro/routine (it receives pointers to two elements): + * the only condition of interest is whenever one element is less than + * another, no other conditions (greather than, equal to etc) are tested. + * So, for example, to define integer sort, use: + * #define islt(a,b) ((*a)<(*b)) + * GKQSORT(int, arr, n, islt) + * + * The macro could be used to implement a sorting function (see examples + * below), or to implement the sorting algorithm inline. That is, either + * create a sorting function and use it whenever you want to sort something, + * or use GKQSORT() macro directly instead a call to such routine. Note that + * the macro expands to quite some code (compiled size of int qsort on x86 + * is about 700..800 bytes). + * + * Using this macro directly it isn't possible to implement traditional + * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE), + * while qsort() allows element size to be different. + * + * Several ready-to-use examples: + * + * Sorting array of integers: + * void int_qsort(int *arr, unsigned n) { + * #define int_lt(a,b) ((*a)<(*b)) + * GKQSORT(int, arr, n, int_lt); + * } + * + * Sorting array of string pointers: + * void str_qsort(char *arr[], unsigned n) { + * #define str_lt(a,b) (strcmp((*a),(*b)) < 0) + * GKQSORT(char*, arr, n, str_lt); + * } + * + * Sorting array of structures: + * + * struct elt { + * int key; + * ... + * }; + * void elt_qsort(struct elt *arr, unsigned n) { + * #define elt_lt(a,b) ((a)->key < (b)->key) + * GKQSORT(struct elt, arr, n, elt_lt); + * } + * + * And so on. + */ + +/* Swap two items pointed to by A and B using temporary buffer t. */ +#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t))) + +/* Discontinue quicksort algorithm when partition gets below this size. */ +#define _GKQSORT_MAX_THRESH 8 + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +#define _GKQSORT_STACK_SIZE (8 * sizeof(size_t)) +#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top)) +#define _GKQSORT_POP(low, high, top) ((--top, (low = top->_lo), (high = top->_hi))) +#define _GKQSORT_STACK_NOT_EMPTY (_stack < _top) + + +/* The main code starts here... */ +#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT) \ +{ \ + GKQSORT_TYPE *const _base = (GKQSORT_BASE); \ + const size_t _elems = (GKQSORT_NELT); \ + GKQSORT_TYPE _hold; \ + \ + if (_elems < 1) \ + return; \ + \ + /* Don't declare two variables of type GKQSORT_TYPE in a single \ + * statement: eg `TYPE a, b;', in case if TYPE is a pointer, \ + * expands to `type* a, b;' wich isn't what we want. \ + */ \ + \ + if (_elems > _GKQSORT_MAX_THRESH) { \ + GKQSORT_TYPE *_lo = _base; \ + GKQSORT_TYPE *_hi = _lo + _elems - 1; \ + struct { \ + GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \ + } _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1; \ + \ + while (_GKQSORT_STACK_NOT_EMPTY) { \ + GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr; \ + \ + /* Select median value from among LO, MID, and HI. Rearrange \ + LO and HI so the three values are sorted. This lowers the \ + probability of picking a pathological pivot value and \ + skips a comparison for both the LEFT_PTR and RIGHT_PTR in \ + the while loops. */ \ + \ + GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \ + \ + if (GKQSORT_LT (_mid, _lo)) \ + _GKQSORT_SWAP (_mid, _lo, _hold); \ + if (GKQSORT_LT (_hi, _mid)) \ + _GKQSORT_SWAP (_mid, _hi, _hold); \ + else \ + goto _jump_over; \ + if (GKQSORT_LT (_mid, _lo)) \ + _GKQSORT_SWAP (_mid, _lo, _hold); \ + _jump_over:; \ + \ + _left_ptr = _lo + 1; \ + _right_ptr = _hi - 1; \ + \ + /* Here's the famous ``collapse the walls'' section of quicksort. \ + Gotta like those tight inner loops! They are the main reason \ + that this algorithm runs much faster than others. */ \ + do { \ + while (GKQSORT_LT (_left_ptr, _mid)) \ + ++_left_ptr; \ + \ + while (GKQSORT_LT (_mid, _right_ptr)) \ + --_right_ptr; \ + \ + if (_left_ptr < _right_ptr) { \ + _GKQSORT_SWAP (_left_ptr, _right_ptr, _hold); \ + if (_mid == _left_ptr) \ + _mid = _right_ptr; \ + else if (_mid == _right_ptr) \ + _mid = _left_ptr; \ + ++_left_ptr; \ + --_right_ptr; \ + } \ + else if (_left_ptr == _right_ptr) { \ + ++_left_ptr; \ + --_right_ptr; \ + break; \ + } \ + } while (_left_ptr <= _right_ptr); \ + \ + /* Set up pointers for next iteration. First determine whether \ + left and right partitions are below the threshold size. If so, \ + ignore one or both. Otherwise, push the larger partition's \ + bounds on the stack and continue sorting the smaller one. */ \ + \ + if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) { \ + if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \ + /* Ignore both small partitions. */ \ + _GKQSORT_POP (_lo, _hi, _top); \ + else \ + /* Ignore small left partition. */ \ + _lo = _left_ptr; \ + } \ + else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \ + /* Ignore small right partition. */ \ + _hi = _right_ptr; \ + else if (_right_ptr - _lo > _hi - _left_ptr) { \ + /* Push larger left partition indices. */ \ + _GKQSORT_PUSH (_top, _lo, _right_ptr); \ + _lo = _left_ptr; \ + } \ + else { \ + /* Push larger right partition indices. */ \ + _GKQSORT_PUSH (_top, _left_ptr, _hi); \ + _hi = _right_ptr; \ + } \ + } \ + } \ + \ + /* Once the BASE array is partially sorted by quicksort the rest \ + is completely sorted using insertion sort, since this is efficient \ + for partitions below MAX_THRESH size. BASE points to the \ + beginning of the array to sort, and END_PTR points at the very \ + last element in the array (*not* one beyond it!). */ \ + \ + { \ + GKQSORT_TYPE *const _end_ptr = _base + _elems - 1; \ + GKQSORT_TYPE *_tmp_ptr = _base; \ + register GKQSORT_TYPE *_run_ptr; \ + GKQSORT_TYPE *_thresh; \ + \ + _thresh = _base + _GKQSORT_MAX_THRESH; \ + if (_thresh > _end_ptr) \ + _thresh = _end_ptr; \ + \ + /* Find smallest element in first threshold and place it at the \ + array's beginning. This is the smallest array element, \ + and the operation speeds up insertion sort's inner loop. */ \ + \ + for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \ + if (GKQSORT_LT (_run_ptr, _tmp_ptr)) \ + _tmp_ptr = _run_ptr; \ + \ + if (_tmp_ptr != _base) \ + _GKQSORT_SWAP (_tmp_ptr, _base, _hold); \ + \ + /* Insertion sort, running from left-hand-side \ + * up to right-hand-side. */ \ + \ + _run_ptr = _base + 1; \ + while (++_run_ptr <= _end_ptr) { \ + _tmp_ptr = _run_ptr - 1; \ + while (GKQSORT_LT (_run_ptr, _tmp_ptr)) \ + --_tmp_ptr; \ + \ + ++_tmp_ptr; \ + if (_tmp_ptr != _run_ptr) { \ + GKQSORT_TYPE *_trav = _run_ptr + 1; \ + while (--_trav >= _run_ptr) { \ + GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \ + _hold = *_trav; \ + \ + for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \ + *_hi = *_lo; \ + *_hi = _hold; \ + } \ + } \ + } \ + } \ + \ +} + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_mkutils.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkutils.h new file mode 100644 index 000000000..a092f2227 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_mkutils.h @@ -0,0 +1,40 @@ +/*! +\file +\brief Templates for various utility routines + +\date Started 5/28/07 +\author George +\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_MKUTILS_H_ +#define _GK_MKUTILS_H_ + + +#define GK_MKARRAY2CSR(PRFX, TYPE)\ +/*************************************************************************/\ +/*! The macro for gk_?array2csr() routine */\ +/**************************************************************************/\ +void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\ +{\ + TYPE i;\ +\ + for (i=0; i<=range; i++)\ + ptr[i] = 0;\ +\ + for (i=0; i<n; i++)\ + ptr[array[i]]++;\ +\ + /* Compute the ptr, ind structure */\ + MAKECSR(i, range, ptr);\ + for (i=0; i<n; i++)\ + ind[ptr[array[i]]++] = i;\ + SHIFTCSR(i, range, ptr);\ +} + + +#define GK_MKARRAY2CSR_PROTO(PRFX, TYPE)\ + void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind);\ + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_proto.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_proto.h new file mode 100644 index 000000000..d99bd5ee9 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_proto.h @@ -0,0 +1,423 @@ +/*! +\file gk_proto.h +\brief This file contains function prototypes + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_proto.h 22010 2018-05-14 20:20:26Z karypis $ \endverbatim +*/ + +#ifndef _GK_PROTO_H_ +#define _GK_PROTO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/*------------------------------------------------------------- + * blas.c + *-------------------------------------------------------------*/ +GK_MKBLAS_PROTO(gk_c, char, int) +GK_MKBLAS_PROTO(gk_i, int, int) +GK_MKBLAS_PROTO(gk_i8, int8_t, int8_t) +GK_MKBLAS_PROTO(gk_i16, int16_t, int16_t) +GK_MKBLAS_PROTO(gk_i32, int32_t, int32_t) +GK_MKBLAS_PROTO(gk_i64, int64_t, int64_t) +GK_MKBLAS_PROTO(gk_z, ssize_t, ssize_t) +GK_MKBLAS_PROTO(gk_zu, size_t, size_t) +GK_MKBLAS_PROTO(gk_f, float, float) +GK_MKBLAS_PROTO(gk_d, double, double) +GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t) + + + + +/*------------------------------------------------------------- + * io.c + *-------------------------------------------------------------*/ +FILE *gk_fopen(char *, char *, const char *); +void gk_fclose(FILE *); +ssize_t gk_read(int fd, void *vbuf, size_t count); +ssize_t gk_write(int fd, void *vbuf, size_t count); +gk_idx_t gk_getline(char **lineptr, size_t *n, FILE *stream); +char **gk_readfile(char *fname, size_t *r_nlines); +int32_t *gk_i32readfile(char *fname, size_t *r_nlines); +int64_t *gk_i64readfile(char *fname, size_t *r_nlines); +ssize_t *gk_zreadfile(char *fname, size_t *r_nlines); +int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts); +size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a); +int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts); +size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a); +ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a); +float *gk_freadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_fwritefilebin(char *fname, size_t n, float *a); +double *gk_dreadfilebin(char *fname, size_t *r_nelmnts); +size_t gk_dwritefilebin(char *fname, size_t n, double *a); + + + + +/*------------------------------------------------------------- + * fs.c + *-------------------------------------------------------------*/ +int gk_fexists(char *); +int gk_dexists(char *); +ssize_t gk_getfsize(char *); +void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, + size_t *r_max_nlntokens, size_t *r_nbytes); +char *gk_getbasename(char *path); +char *gk_getextname(char *path); +char *gk_getfilename(char *path); +char *gk_getpathname(char *path); +int gk_mkpath(char *); +int gk_rmpath(char *); + + + +/*------------------------------------------------------------- + * memory.c + *-------------------------------------------------------------*/ +GK_MKALLOC_PROTO(gk_c, char) +GK_MKALLOC_PROTO(gk_i, int) +GK_MKALLOC_PROTO(gk_i8, int8_t) +GK_MKALLOC_PROTO(gk_i16, int16_t) +GK_MKALLOC_PROTO(gk_i32, int32_t) +GK_MKALLOC_PROTO(gk_i64, int64_t) +GK_MKALLOC_PROTO(gk_ui8, uint8_t) +GK_MKALLOC_PROTO(gk_ui16, uint16_t) +GK_MKALLOC_PROTO(gk_ui32, uint32_t) +GK_MKALLOC_PROTO(gk_ui64, uint64_t) +GK_MKALLOC_PROTO(gk_z, ssize_t) +GK_MKALLOC_PROTO(gk_zu, size_t) +GK_MKALLOC_PROTO(gk_f, float) +GK_MKALLOC_PROTO(gk_d, double) +GK_MKALLOC_PROTO(gk_idx, gk_idx_t) + +GK_MKALLOC_PROTO(gk_ckv, gk_ckv_t) +GK_MKALLOC_PROTO(gk_ikv, gk_ikv_t) +GK_MKALLOC_PROTO(gk_i8kv, gk_i8kv_t) +GK_MKALLOC_PROTO(gk_i16kv, gk_i16kv_t) +GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t) +GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t) +GK_MKALLOC_PROTO(gk_zkv, gk_zkv_t) +GK_MKALLOC_PROTO(gk_zukv, gk_zukv_t) +GK_MKALLOC_PROTO(gk_fkv, gk_fkv_t) +GK_MKALLOC_PROTO(gk_dkv, gk_dkv_t) +GK_MKALLOC_PROTO(gk_skv, gk_skv_t) +GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t) + +void gk_AllocMatrix(void ***, size_t, size_t , size_t); +void gk_FreeMatrix(void ***, size_t, size_t); +int gk_malloc_init(); +void gk_malloc_cleanup(int showstats); +void *gk_malloc(size_t nbytes, char *msg); +void *gk_realloc(void *oldptr, size_t nbytes, char *msg); +void gk_free(void **ptr1,...); +size_t gk_GetCurMemoryUsed(); +size_t gk_GetMaxMemoryUsed(); +void gk_GetVMInfo(size_t *vmsize, size_t *vmrss); + + + +/*------------------------------------------------------------- + * seq.c + *-------------------------------------------------------------*/ +gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name); +gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet); +void gk_seq_init(gk_seq_t *seq); + + + +/*------------------------------------------------------------- + * error.c + *-------------------------------------------------------------*/ +void gk_set_exit_on_error(int value); +void errexit(char *,...); +void gk_errexit(int signum, char *,...); +int gk_sigtrap(); +int gk_siguntrap(); +void gk_sigthrow(int signum); +void gk_SetSignalHandlers(); +void gk_UnsetSignalHandlers(); +void gk_NonLocalExit_Handler(int signum); +char *gk_strerror(int errnum); +void PrintBackTrace(); + + +/*------------------------------------------------------------- + * util.c + *-------------------------------------------------------------*/ +void gk_RandomPermute(size_t, int *, int); +void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind); +int gk_log2(int); +int gk_ispow2(int); +float gk_flog2(float); + + +/*------------------------------------------------------------- + * time.c + *-------------------------------------------------------------*/ +gk_wclock_t gk_WClockSeconds(void); +double gk_CPUSeconds(void); + +/*------------------------------------------------------------- + * string.c + *-------------------------------------------------------------*/ +char *gk_strchr_replace(char *str, char *fromlist, char *tolist); +int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str); +char *gk_strtprune(char *, char *); +char *gk_strhprune(char *, char *); +char *gk_strtoupper(char *); +char *gk_strtolower(char *); +char *gk_strdup(char *orgstr); +int gk_strcasecmp(char *s1, char *s2); +int gk_strrcmp(char *s1, char *s2); +char *gk_time2str(time_t time); +time_t gk_str2time(char *str); +int gk_GetStringID(gk_StringMap_t *strmap, char *key); + + + +/*------------------------------------------------------------- + * sort.c + *-------------------------------------------------------------*/ +void gk_csorti(size_t, char *); +void gk_csortd(size_t, char *); +void gk_isorti(size_t, int *); +void gk_isortd(size_t, int *); +void gk_i32sorti(size_t, int32_t *); +void gk_i32sortd(size_t, int32_t *); +void gk_i64sorti(size_t, int64_t *); +void gk_i64sortd(size_t, int64_t *); +void gk_ui32sorti(size_t, uint32_t *); +void gk_ui32sortd(size_t, uint32_t *); +void gk_ui64sorti(size_t, uint64_t *); +void gk_ui64sortd(size_t, uint64_t *); +void gk_fsorti(size_t, float *); +void gk_fsortd(size_t, float *); +void gk_dsorti(size_t, double *); +void gk_dsortd(size_t, double *); +void gk_idxsorti(size_t, gk_idx_t *); +void gk_idxsortd(size_t, gk_idx_t *); +void gk_ckvsorti(size_t, gk_ckv_t *); +void gk_ckvsortd(size_t, gk_ckv_t *); +void gk_ikvsorti(size_t, gk_ikv_t *); +void gk_ikvsortd(size_t, gk_ikv_t *); +void gk_i32kvsorti(size_t, gk_i32kv_t *); +void gk_i32kvsortd(size_t, gk_i32kv_t *); +void gk_i64kvsorti(size_t, gk_i64kv_t *); +void gk_i64kvsortd(size_t, gk_i64kv_t *); +void gk_zkvsorti(size_t, gk_zkv_t *); +void gk_zkvsortd(size_t, gk_zkv_t *); +void gk_zukvsorti(size_t, gk_zukv_t *); +void gk_zukvsortd(size_t, gk_zukv_t *); +void gk_fkvsorti(size_t, gk_fkv_t *); +void gk_fkvsortd(size_t, gk_fkv_t *); +void gk_dkvsorti(size_t, gk_dkv_t *); +void gk_dkvsortd(size_t, gk_dkv_t *); +void gk_skvsorti(size_t, gk_skv_t *); +void gk_skvsortd(size_t, gk_skv_t *); +void gk_idxkvsorti(size_t, gk_idxkv_t *); +void gk_idxkvsortd(size_t, gk_idxkv_t *); + + +/*------------------------------------------------------------- + * Selection routines + *-------------------------------------------------------------*/ +int gk_dfkvkselect(size_t, int, gk_fkv_t *); +int gk_ifkvkselect(size_t, int, gk_fkv_t *); + + +/*------------------------------------------------------------- + * Priority queue + *-------------------------------------------------------------*/ +GK_MKPQUEUE_PROTO(gk_ipq, gk_ipq_t, int, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_fpq, gk_fpq_t, float, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_dpq, gk_dpq_t, double, gk_idx_t) +GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t) + + +/*------------------------------------------------------------- + * HTable routines + *-------------------------------------------------------------*/ +gk_HTable_t *HTable_Create(int nelements); +void HTable_Reset(gk_HTable_t *htable); +void HTable_Resize(gk_HTable_t *htable, int nelements); +void HTable_Insert(gk_HTable_t *htable, int key, int val); +void HTable_Delete(gk_HTable_t *htable, int key); +int HTable_Search(gk_HTable_t *htable, int key); +int HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type); +int HTable_SearchAndDelete(gk_HTable_t *htable, int key); +void HTable_Destroy(gk_HTable_t *htable); +int HTable_HFunction(int nelements, int key); + + +/*------------------------------------------------------------- + * Tokenizer routines + *-------------------------------------------------------------*/ +void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens); +void gk_freetokenslist(gk_Tokens_t *tokens); + +/*------------------------------------------------------------- + * Encoder/Decoder + *-------------------------------------------------------------*/ +void encodeblock(unsigned char *in, unsigned char *out); +void decodeblock(unsigned char *in, unsigned char *out); +void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer); +void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer); + + +/*------------------------------------------------------------- + * random.c + *-------------------------------------------------------------*/ +GK_MKRANDOM_PROTO(gk_c, size_t, char) +GK_MKRANDOM_PROTO(gk_i, size_t, int) +GK_MKRANDOM_PROTO(gk_i32, size_t, int32_t) +GK_MKRANDOM_PROTO(gk_f, size_t, float) +GK_MKRANDOM_PROTO(gk_d, size_t, double) +GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t) +GK_MKRANDOM_PROTO(gk_z, size_t, ssize_t) +GK_MKRANDOM_PROTO(gk_zu, size_t, size_t) +void gk_randinit(uint64_t); +uint64_t gk_randint64(void); +uint32_t gk_randint32(void); + + +/*------------------------------------------------------------- + * OpenMP fake functions + *-------------------------------------------------------------*/ +#if !defined(__OPENMP__) +void omp_set_num_threads(int num_threads); +int omp_get_num_threads(void); +int omp_get_max_threads(void); +int omp_get_thread_num(void); +int omp_get_num_procs(void); +int omp_in_parallel(void); +void omp_set_dynamic(int num_threads); +int omp_get_dynamic(void); +void omp_set_nested(int nested); +int omp_get_nested(void); +#endif /* __OPENMP__ */ + + +/*------------------------------------------------------------- + * CSR-related functions + *-------------------------------------------------------------*/ +gk_csr_t *gk_csr_Create(); +void gk_csr_Init(gk_csr_t *mat); +void gk_csr_Free(gk_csr_t **mat); +void gk_csr_FreeContents(gk_csr_t *mat); +gk_csr_t *gk_csr_Dup(gk_csr_t *mat); +gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows); +gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind); +gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid); +gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color); +int gk_csr_DetermineFormat(char *filename, int format); +gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering); +void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering); +gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf); +gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction); +gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval); +gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore); +void gk_csr_CompactColumns(gk_csr_t *mat); +void gk_csr_SortIndices(gk_csr_t *mat, int what); +void gk_csr_CreateIndex(gk_csr_t *mat, int what); +void gk_csr_Normalize(gk_csr_t *mat, int what, int norm); +void gk_csr_Scale(gk_csr_t *mat, int type); +void gk_csr_ComputeSums(gk_csr_t *mat, int what); +void gk_csr_ComputeNorms(gk_csr_t *mat, int what); +void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what); +gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int summetric); +gk_csr_t *gk_csr_Transpose(gk_csr_t *mat); +float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype); +float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, int i1, int i2, int what, int simtype); +int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval, + int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker, + gk_fkv_t *i_cand); +int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, + int32_t *cids); +gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op); +gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm); +void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, + int32_t **r_perm, int32_t **r_iperm); +void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type, + int32_t **r_perm, int32_t **r_iperm); + + +/* itemsets.c */ +void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, + int minfreq, int maxfreq, int minlen, int maxlen, + void (*process_itemset)(void *stateptr, int nitems, int *itemind, + int ntrans, int *tranind), + void *stateptr); + + +/* evaluate.c */ +float ComputeAccuracy(int n, gk_fkv_t *list); +float ComputeROCn(int n, int maxN, gk_fkv_t *list); +float ComputeMedianRFP(int n, gk_fkv_t *list); +float ComputeMean (int n, float *values); +float ComputeStdDev(int n, float *values); + + +/* mcore.c */ +gk_mcore_t *gk_mcoreCreate(size_t coresize); +gk_mcore_t *gk_gkmcoreCreate(); +void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats); +void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats); +void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes); +void gk_mcorePush(gk_mcore_t *mcore); +void gk_gkmcorePush(gk_mcore_t *mcore); +void gk_mcorePop(gk_mcore_t *mcore); +void gk_gkmcorePop(gk_mcore_t *mcore); +void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr); +void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr); +void gk_mcoreDel(gk_mcore_t *mcore, void *ptr); +void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr); + +/* rw.c */ +int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr); + + +/* graph.c */ +gk_graph_t *gk_graph_Create(); +void gk_graph_Init(gk_graph_t *graph); +void gk_graph_Free(gk_graph_t **graph); +void gk_graph_FreeContents(gk_graph_t *graph); +gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, + int numbering, int isfewgts, int isfvwgts, int isfvsizes); +void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering); +gk_graph_t *gk_graph_Dup(gk_graph_t *graph); +gk_graph_t *gk_graph_Transpose(gk_graph_t *graph); +gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs); +gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm); +int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind); +void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, + int32_t **r_iperm); +void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm); +void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm); +void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps); +void gk_graph_SortAdjacencies(gk_graph_t *graph); +gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op); + + +/* cache.c */ +gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits); +void gk_cacheReset(gk_cache_t *cache); +void gk_cacheDestroy(gk_cache_t **r_cache); +int gk_cacheLoad(gk_cache_t *cache, size_t addr); +double gk_cacheGetHitRate(gk_cache_t *cache); + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_struct.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_struct.h new file mode 100644 index 000000000..2925e9829 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_struct.h @@ -0,0 +1,296 @@ +/*! +\file gk_struct.h +\brief This file contains various datastructures used/provided by GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_struct.h 21988 2018-04-16 00:11:19Z karypis $ \endverbatim +*/ + +#ifndef _GK_STRUCT_H_ +#define _GK_STRUCT_H_ + + +/********************************************************************/ +/*! Generator for gk_??KeyVal_t data structure */ +/********************************************************************/ +#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \ +typedef struct {\ + KEYTYPE key;\ + VALTYPE val;\ +} NAME;\ + +/* The actual KeyVal data structures */ +GK_MKKEYVALUE_T(gk_ckv_t, char, ssize_t) +GK_MKKEYVALUE_T(gk_ikv_t, int, ssize_t) +GK_MKKEYVALUE_T(gk_i8kv_t, int8_t, ssize_t) +GK_MKKEYVALUE_T(gk_i16kv_t, int16_t, ssize_t) +GK_MKKEYVALUE_T(gk_i32kv_t, int32_t, ssize_t) +GK_MKKEYVALUE_T(gk_i64kv_t, int64_t, ssize_t) +GK_MKKEYVALUE_T(gk_zkv_t, ssize_t, ssize_t) +GK_MKKEYVALUE_T(gk_zukv_t, size_t, ssize_t) +GK_MKKEYVALUE_T(gk_fkv_t, float, ssize_t) +GK_MKKEYVALUE_T(gk_dkv_t, double, ssize_t) +GK_MKKEYVALUE_T(gk_skv_t, char *, ssize_t) +GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t) + + + +/********************************************************************/ +/*! Generator for gk_?pq_t data structure */ +/********************************************************************/ +#define GK_MKPQUEUE_T(NAME, KVTYPE)\ +typedef struct {\ + size_t nnodes;\ + size_t maxnodes;\ +\ + /* Heap version of the data structure */ \ + KVTYPE *heap;\ + ssize_t *locator;\ +} NAME;\ + +GK_MKPQUEUE_T(gk_ipq_t, gk_ikv_t) +GK_MKPQUEUE_T(gk_i32pq_t, gk_i32kv_t) +GK_MKPQUEUE_T(gk_i64pq_t, gk_i64kv_t) +GK_MKPQUEUE_T(gk_fpq_t, gk_fkv_t) +GK_MKPQUEUE_T(gk_dpq_t, gk_dkv_t) +GK_MKPQUEUE_T(gk_idxpq_t, gk_idxkv_t) + + +#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\ +typedef struct {\ + ssize_t nnodes;\ + ssize_t maxnodes;\ +\ + /* Heap version of the data structure */ \ + KTYPE *keys;\ + VTYPE *vals;\ +} NAME;\ + + + +/*------------------------------------------------------------- + * The following data structure stores a sparse CSR format + *-------------------------------------------------------------*/ +typedef struct gk_csr_t { + int32_t nrows, ncols; + ssize_t *rowptr, *colptr; + int32_t *rowind, *colind; + int32_t *rowids, *colids; + int32_t *rlabels, *clabels; + int32_t *rmap, *cmap; + float *rowval, *colval; + float *rnorms, *cnorms; + float *rsums, *csums; + float *rsizes, *csizes; + float *rvols, *cvols; + float *rwgts, *cwgts; +} gk_csr_t; + + +/*------------------------------------------------------------- + * The following data structure stores a sparse graph + *-------------------------------------------------------------*/ +typedef struct gk_graph_t { + int32_t nvtxs; /*!< The number of vertices in the graph */ + ssize_t *xadj; /*!< The ptr-structure of the adjncy list */ + int32_t *adjncy; /*!< The adjacency list of the graph */ + int32_t *iadjwgt; /*!< The integer edge weights */ + float *fadjwgt; /*!< The floating point edge weights */ + int32_t *ivwgts; /*!< The integer vertex weights */ + float *fvwgts; /*!< The floating point vertex weights */ + int32_t *ivsizes; /*!< The integer vertex sizes */ + float *fvsizes; /*!< The floating point vertex sizes */ + int32_t *vlabels; /*!< The labels of the vertices */ +} gk_graph_t; + + +/*------------------------------------------------------------- + * The following data structure stores stores a string as a + * pair of its allocated buffer and the buffer itself. + *-------------------------------------------------------------*/ +typedef struct gk_str_t { + size_t len; + char *buf; +} gk_str_t; + + + + +/*------------------------------------------------------------- +* The following data structure implements a string-2-int mapping +* table used for parsing command-line options +*-------------------------------------------------------------*/ +typedef struct gk_StringMap_t { + char *name; + int id; +} gk_StringMap_t; + + +/*------------------------------------------------------------ + * This structure implements a simple hash table + *------------------------------------------------------------*/ +typedef struct gk_HTable_t { + int nelements; /* The overall size of the hash-table */ + int htsize; /* The current size of the hash-table */ + gk_ikv_t *harray; /* The actual hash-table */ +} gk_HTable_t; + + +/*------------------------------------------------------------ + * This structure implements a gk_Tokens_t list returned by the + * string tokenizer + *------------------------------------------------------------*/ +typedef struct gk_Tokens_t { + int ntoks; /* The number of tokens in the input string */ + char *strbuf; /* The memory that stores all the entries */ + char **list; /* Pointers to the strbuf for each element */ +} gk_Tokens_t; + + +/*------------------------------------------------------------ + * This structure implements storage for an atom in a pdb file + *------------------------------------------------------------*/ +typedef struct atom { + int serial; + char *name; + char altLoc; + char *resname; + char chainid; + int rserial; + char icode; + char element; + double x; + double y; + double z; + double opcy; + double tmpt; +} atom; + + +/*------------------------------------------------------------ + * This structure implements storage for a center of mass for + * a single residue. + *------------------------------------------------------------*/ +typedef struct center_of_mass { + char name; + double x; + double y; + double z; +} center_of_mass; + + +/*------------------------------------------------------------ + * This structure implements storage for a pdb protein + *------------------------------------------------------------*/ +typedef struct pdbf { + int natoms; /* Number of atoms */ + int nresidues; /* Number of residues based on coordinates */ + int ncas; + int nbbs; + int corruption; + char *resSeq; /* Residue sequence based on coordinates */ + char **threeresSeq; /* three-letter residue sequence */ + atom *atoms; + atom **bbs; + atom **cas; + center_of_mass *cm; +} pdbf; + + + +/************************************************************* +* Localization Structures for converting characters to integers +**************************************************************/ +typedef struct gk_i2cc2i_t { + int n; + char *i2c; + int *c2i; +} gk_i2cc2i_t; + + +/******************************************************************* + *This structure implements storage of a protein sequence + * *****************************************************************/ +typedef struct gk_seq_t { + + int len; /*Number of Residues */ + int *sequence; /* Stores the sequence*/ + + + int **pssm; /* Stores the pssm matrix */ + int **psfm; /* Stores the psfm matrix */ + char *name; /* Stores the name of the sequence */ + + int nsymbols; + + +} gk_seq_t; + + + + +/*************************************************************************/ +/*! The following data structure stores information about a memory + allocation operation that can either be served from gk_mcore_t or by + a gk_malloc if not sufficient workspace memory is available. */ +/*************************************************************************/ +typedef struct gk_mop_t { + int type; + ssize_t nbytes; + void *ptr; +} gk_mop_t; + + +/*************************************************************************/ +/*! The following structure defines the mcore for GKlib's customized + memory allocations. */ +/*************************************************************************/ +typedef struct gk_mcore_t { + /* Workspace information */ + size_t coresize; /*!< The amount of core memory that has been allocated */ + size_t corecpos; /*!< Index of the first free location in core */ + void *core; /*!< Pointer to the core itself */ + + /* These are for implementing a stack-based allocation scheme using both + core and also dynamically allocated memory */ + size_t nmops; /*!< The number of maop_t entries that have been allocated */ + size_t cmop; /*!< Index of the first free location in maops */ + gk_mop_t *mops; /*!< The array recording the maop_t operations */ + + /* These are for keeping various statistics for wspacemalloc */ + size_t num_callocs; /*!< The number of core mallocs */ + size_t num_hallocs; /*!< The number of heap mallocs */ + size_t size_callocs; /*!< The total # of bytes in core mallocs */ + size_t size_hallocs; /*!< The total # of bytes in heap mallocs */ + size_t cur_callocs; /*!< The current # of bytes in core mallocs */ + size_t cur_hallocs; /*!< The current # of bytes in heap mallocs */ + size_t max_callocs; /*!< The maximum # of bytes in core mallocs at any given time */ + size_t max_hallocs; /*!< The maximum # of bytes in heap mallocs at any given time */ + +} gk_mcore_t; + + +/*************************************************************************/ +/*! The following structure is used for cache simulation for performance + modeling and analysis. */ +/*************************************************************************/ +typedef struct gk_cache_t { + /*! The total cache is nway*(2^(cnbits+lnbits)) bytes */ + uint32_t nway; /*!< the associativity of the cache */ + uint32_t lnbits; /*!< the number of address bits indexing the cache line */ + uint32_t cnbits; /*!< the number of address bits indexing the cache */ + size_t csize; /*!< 2^cnbits */ + size_t cmask; /*!< csize-1 */ + + uint64_t clock; /*!< a clock in terms of accesses */ + + uint64_t *latimes; /*!< a cacheline-level last access time */ + size_t *clines; /*!< the cache in terms of cachelines */ + + uint64_t nhits; /*!< counts the number of hits */ + uint64_t nmisses; /*!< counts the number of misses */ +} gk_cache_t; + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_types.h b/3rdParty/metis/metis-5.1.1/GKlib/gk_types.h new file mode 100644 index 000000000..57c119101 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_types.h @@ -0,0 +1,38 @@ +/*! +\file gk_types.h +\brief This file contains basic scalar datatype used in GKlib + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#ifndef _GK_TYPES_H_ +#define _GK_TYPES_H_ + +/************************************************************************* +* Basic data type definitions. These definitions allow GKlib to separate +* the following elemental types: +* - loop iterator variables, which are set to size_t +* - signed and unsigned int variables that can be set to any # of bits +* - signed and unsigned long variables that can be set to any # of bits +* - real variables, which can be set to single or double precision. +**************************************************************************/ +/*typedef ptrdiff_t gk_idx_t; */ /* index variable */ +typedef ssize_t gk_idx_t; /* index variable */ +typedef int32_t gk_int_t; /* integer values */ +typedef uint32_t gk_uint_t; /* unsigned integer values */ +typedef int64_t gk_long_t; /* long integer values */ +typedef uint64_t gk_ulong_t; /* unsigned long integer values */ +typedef float gk_real_t; /* real type */ +typedef double gk_dreal_t; /* double precission real type */ +typedef double gk_wclock_t; /* wall-clock time */ + +/*#define GK_IDX_MAX PTRDIFF_MAX*/ +#define GK_IDX_MAX ((SIZE_MAX>>1)-2) + +#define PRIGKIDX "zd" +#define SCNGKIDX "zd" + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gk_util.c b/3rdParty/metis/metis-5.1.1/GKlib/gk_util.c new file mode 100644 index 000000000..e1e68db0e --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gk_util.c @@ -0,0 +1,107 @@ +/*! +\file util.c +\brief Various utility routines + +\date Started 4/12/2007 +\author George +\version\verbatim $Id: gk_util.c 16223 2014-02-15 21:34:09Z karypis $ \endverbatim +*/ + + +#include <GKlib.h> + + +/************************************************************************* +* This file randomly permutes the contents of an array. +* flag == 0, don't initialize perm +* flag == 1, set p[i] = i +**************************************************************************/ +void gk_RandomPermute(size_t n, int *p, int flag) +{ + size_t i, u, v; + int tmp; + + if (flag == 1) { + for (i=0; i<n; i++) + p[i] = i; + } + + for (i=0; i<n/2; i++) { + v = RandomInRange(n); + u = RandomInRange(n); + gk_SWAP(p[v], p[u], tmp); + } +} + + +/************************************************************************/ +/*! +\brief Converts an element-based set membership into a CSR-format set-based + membership. + +For example, it takes an array such as part[] that stores where each +element belongs to and returns a pair of arrays (pptr[], pind[]) that +store in CSF format the list of elements belonging in each partition. + +\param n + the number of elements in the array (e.g., # of vertices) +\param range + the cardinality of the set (e.g., # of partitions) +\param array + the array that stores the per-element set membership +\param ptr + the array that will store the starting indices in ind for + the elements of each set. This is filled by the routine and + its size should be at least range+1. +\param ind + the array that stores consecutively which elements belong to + each set. The size of this array should be n. +*/ +/************************************************************************/ +void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind) +{ + size_t i; + + gk_iset(range+1, 0, ptr); + + for (i=0; i<n; i++) + ptr[array[i]]++; + + /* Compute the ptr, ind structure */ + MAKECSR(i, range, ptr); + for (i=0; i<n; i++) + ind[ptr[array[i]]++] = i; + SHIFTCSR(i, range, ptr); +} + + +/************************************************************************* +* This function returns the log2(x) +**************************************************************************/ +int gk_log2(int a) +{ + size_t i; + + for (i=1; a > 1; i++, a = a>>1); + return i-1; +} + + +/************************************************************************* +* This function checks if the argument is a power of 2 +**************************************************************************/ +int gk_ispow2(int a) +{ + return (a == (1<<gk_log2(a))); +} + + +/************************************************************************* +* This function returns the log2(x) +**************************************************************************/ +float gk_flog2(float a) +{ + return log(a)/log(2.0); +} + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gkregex.c b/3rdParty/metis/metis-5.1.1/GKlib/gkregex.c new file mode 100644 index 000000000..8a09caab7 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gkregex.c @@ -0,0 +1,10704 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* this is for removing a compiler warning */ +void gkfooo() { return; } + +#ifdef USE_GKREGEX + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +#include "GKlib.h" + + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regex_internal.h" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if defined(__MINGW32_VERSION) || defined(_MSC_VER) +#define strcasecmp stricmp +#endif + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include <langinfo.h> +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include <locale.h> +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include <wchar.h> +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include <wctype.h> +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include <stdbool.h> +#else +typedef enum { false, true } bool; +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if defined HAVE_STDINT_H || defined _LIBC +# include <stdint.h> +#endif /* HAVE_STDINT_H || _LIBC */ +#if defined _LIBC +# include <bits/libc-lock.h> +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include <locale/localeinfo.h> +# include <locale/elem-hash.h> +# include <locale/coll-lookup.h> +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include <libintl.h> +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# define __wctype wctype +# define __iswctype iswctype +# define __btowc btowc +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +#ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; +#endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifdef __GNUC__ +# define alloca(size) __builtin_alloca (size) +# define HAVE_ALLOCA 1 +#elif defined(_MSC_VER) +# include <malloc.h> +# define alloca _alloca +# define HAVE_ALLOCA 1 +#else +# error No alloca() +#endif + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif + __libc_lock_define (, lock) +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include <locale/weight.h> + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regex_internal.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> + Then wide character buffer will be: + <wc1> , WEOF , <wc2> , WEOF , <wc3> + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wchar_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + int remain_len; + remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *q, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + int type = token.type; + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + re_node_set_insert_last (&newstate->non_eps_nodes, elem); + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + unsigned int constraint = 0; + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (node->constraint) + constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR) + constraint = node->opr.ctx_type; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regcomp.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* Extended regular expression matching and search library. + Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = alloca (dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + int i; + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes + || cset->nranges || cset->nchar_classes) + { +# ifdef _LIBC + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) + { + /* In this case we want to catch the bytes which are + the first byte of any collation elements. + e.g. In da_DK, we want to catch 'a' since "aa" + is a valid collation element, and don't catch + 'b' since 'b' is the only collation element + which starts from 'b'. */ + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# else + if (dfa->mb_cur_max > 1) + for (i = 0; i < SBC_MAX; ++i) + if (__btowc (i) == WEOF) + re_set_fastmap (fastmap, icase, i); +# endif /* not _LIBC */ + } + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, 0, *(unsigned char *) buf); + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *__restrict preg; + const char *__restrict pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +/* regerror ( int errcode, preg, errbuf, errbuf_size) */ +size_t +regerror ( + int errcode, + const regex_t *__restrict preg, + char *__restrict errbuf, + size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; +#endif + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +static const bitset_t utf8_sb_map = +{ + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#endif + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.idx) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + if (dfa->nodes[org_node].type == ANCHOR) + { + /* In case of the node has another constraint, append it. */ + if (org_node == root_node && clone_node != org_node) + { + /* ...but if the node is root_node itself, it means the + epsilon closure have a loop, then tie it to the + destination of the root_node. */ + ret = re_node_set_insert (dfa->edests + clone_node, + org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + constraint |= dfa->nodes[org_node].opr.ctx_type; + } + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There are no such a duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There are a duplicated node which satisfy the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + if (dfa->nodes[org_idx].type == ANCHOR) + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + unsigned int constraint; + int i, incomplete; + re_node_set eclosure; + incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + constraint = ((dfa->nodes[node].type == ANCHOR) + ? dfa->nodes[node].opr.ctx_type : 0); + /* If the current node has constraints, duplicate all nodes. + Since they must inherit the constraints. */ + if (constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + re_node_set_merge (&eclosure, &eclosure_elem); + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* Epsilon closures include itself. */ + re_node_set_insert (&eclosure, node); + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression <reg_exp>: + CAT + / \ + / \ + <reg_exp> EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + <branch1>|<branch2>: + ALT + / \ + / \ + <branch1> <branch2> + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + <exp1><exp2>: + CAT + / \ + / \ + <exp1> <exp2> + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", + it must not be "<ANCHOR(^)><REPEAT(*)>". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "alnum", + (const unsigned char *) "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "space", + (const unsigned char *) "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (<reg_exp>): + SUBEXP + | + <reg_exp> +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); + re_token_t start_token = *token; + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* <re>{} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE (end != -1 && start > end, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\0'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [:<character_class>:], [.<collating_element>.], and + [=<equivalent_class>=]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include <locale/weight.h> + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + if (len == weights[idx2]) + { + int cnt = 0; + while (cnt <= len && + weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + const char *name = (const char *) class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (name, "blank") == 0) + BUILD_CHARCLASS_LOOP (isblank); + else if (strcmp (name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + /* + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set(cset->sbcset, '\0'); + */ + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +/* GKINCLUDE #include "regexec.c" */ +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) internal_function; +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len) internal_function; +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len) internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated) internal_function; +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) + internal_function; +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *__restrict preg; + const char *__restrict string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + reg_errcode_t err; + int start, length; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include <shlib-compat.h> +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, + stop, ret_len) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop, ret_len; + struct re_registers *regs; +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); +#endif + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range, stop, ret_len; + struct re_registers *regs; +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (regs, pmatch, nregs, regs_allocated) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs, regs_allocated; +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) + return REGS_UNALLOCATED; + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) + return REGS_UNALLOCATED; + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, + eflags) + const regex_t *preg; + const char *string; + int length, start, range, stop, eflags; + size_t nmatch; + regmatch_t pmatch[]; +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (mctx) + re_match_context_t *mctx; +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + <src> <dst> ( <subexp> ) + ( <subexp> ) <src> <dst> + ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; + reg_errcode_t err = REG_NOERROR; + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + int32_t idx; + /* This #include defines a local function! */ +# include <locale/weight.h> + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx]; + if (weight_len == weights[equiv_class_idx]) + { + int cnt = 0; + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} + + +/* Binary backward compatibility. */ +#if _LIBC +# include <shlib-compat.h> +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/gkregex.h b/3rdParty/metis/metis-5.1.1/GKlib/gkregex.h new file mode 100644 index 000000000..807c404ec --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/gkregex.h @@ -0,0 +1,556 @@ +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include <sys/types.h> + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \<digit> matches <digit>. + If not set, then \<digit> is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +# undef RE_DUP_MAX +#endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE unsigned char * +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long int allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long int used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + RE_TRANSLATE_TYPE translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +# define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); + +#if defined _REGEX_RE_COMP || defined _LIBC +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/3rdParty/metis/metis-5.1.1/GKlib/graph.c b/3rdParty/metis/metis-5.1.1/GKlib/graph.c new file mode 100644 index 000000000..1bfd0cc45 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/graph.c @@ -0,0 +1,1940 @@ +/*! + * \file + * + * \brief Various routines with dealing with sparse graphs + * + * \author George Karypis + * \version\verbatim $Id: graph.c 22415 2019-09-05 16:55:00Z karypis $ \endverbatim + */ + +#include <GKlib.h> + +#define OMPMINOPS 50000 + +/*************************************************************************/ +/*! Allocate memory for a graph and initializes it + \returns the allocated graph. The various fields are set to NULL. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Create() +{ + gk_graph_t *graph; + + graph = (gk_graph_t *)gk_malloc(sizeof(gk_graph_t), "gk_graph_Create: graph"); + + gk_graph_Init(graph); + + return graph; +} + + +/*************************************************************************/ +/*! Initializes the graph. + \param graph is the graph to be initialized. +*/ +/*************************************************************************/ +void gk_graph_Init(gk_graph_t *graph) +{ + memset(graph, 0, sizeof(gk_graph_t)); + graph->nvtxs = -1; +} + + +/*************************************************************************/ +/*! Frees all the memory allocated for a graph. + \param graph is the graph to be freed. +*/ +/*************************************************************************/ +void gk_graph_Free(gk_graph_t **graph) +{ + if (*graph == NULL) + return; + gk_graph_FreeContents(*graph); + gk_free((void **)graph, LTERM); +} + + +/*************************************************************************/ +/*! Frees only the memory allocated for the graph's different fields and + sets them to NULL. + \param graph is the graph whose contents will be freed. +*/ +/*************************************************************************/ +void gk_graph_FreeContents(gk_graph_t *graph) +{ + gk_free((void *)&graph->xadj, &graph->adjncy, + &graph->iadjwgt, &graph->fadjwgt, + &graph->ivwgts, &graph->fvwgts, + &graph->ivsizes, &graph->fvsizes, + &graph->vlabels, + LTERM); +} + + +/**************************************************************************/ +/*! Reads a sparse graph from the supplied file + \param filename is the file that stores the data. + \param format is the graph format. The supported values are: + GK_GRAPH_FMT_METIS, GK_GRAPH_FMT_IJV. + \param hasvals is 1 if the input file has values + \param numbering is 1 if the input file numbering starts from one + \param isfewgts is 1 if the edge-weights should be read as floats + \param isfvwgts is 1 if the vertex-weights should be read as floats + \param isfvsizes is 1 if the vertex-sizes should be read as floats + \returns the graph that was read. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, + int numbering, int isfewgts, int isfvwgts, int isfvsizes) +{ + ssize_t i, k, l; + size_t nfields, nvtxs, nedges, fmt, ncon, lnlen; + ssize_t *xadj; + int32_t ival, *iinds=NULL, *jinds=NULL, *ivals=NULL, *adjncy, *iadjwgt; + float fval, *fvals=NULL, *fadjwgt; + int readsizes=0, readwgts=0, readvals=0; + char *line=NULL, *head, *tail, fmtstr[256]; + FILE *fpin=NULL; + gk_graph_t *graph=NULL; + + + if (!gk_fexists(filename)) + gk_errexit(SIGERR, "File %s does not exist!\n", filename); + + switch (format) { + case GK_GRAPH_FMT_METIS: + fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); + do { + if (gk_getline(&line, &lnlen, fpin) <= 0) + gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename); + } while (line[0] == '%'); + + fmt = ncon = 0; + nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon); + if (nfields < 2) + gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n"); + + nedges *= 2; + + if (fmt > 111) + gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt); + + sprintf(fmtstr, "%03zu", fmt%1000); + readsizes = (fmtstr[0] == '1'); + readwgts = (fmtstr[1] == '1'); + readvals = (fmtstr[2] == '1'); + numbering = 1; + ncon = (ncon == 0 ? 1 : ncon); + + graph = gk_graph_Create(); + + graph->nvtxs = nvtxs; + + graph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj"); + graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy"); + if (readvals) { + if (isfewgts) + graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt"); + else + graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt"); + } + + if (readsizes) { + if (isfvsizes) + graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes"); + else + graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes"); + } + + if (readwgts) { + if (isfvwgts) + graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts"); + else + graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts"); + } + + + /*---------------------------------------------------------------------- + * Read the sparse graph file + *---------------------------------------------------------------------*/ + numbering = (numbering ? - 1 : 0); + for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) { + do { + if (gk_getline(&line, &lnlen, fpin) == -1) + gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i); + } while (line[0] == '%'); + + head = line; + tail = NULL; + + /* Read vertex sizes */ + if (readsizes) { + if (isfvsizes) { +#ifdef __MSC__ + graph->fvsizes[i] = (float)strtod(head, &tail); +#else + graph->fvsizes[i] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (graph->fvsizes[i] < 0) + gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); + } + else { + graph->ivsizes[i] = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1); + if (graph->ivsizes[i] < 0) + gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1); + } + head = tail; + } + + /* Read vertex weights */ + if (readwgts) { + for (l=0; l<ncon; l++) { + if (isfvwgts) { +#ifdef __MSC__ + graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail); +#else + graph->fvwgts[i*ncon+l] = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (graph->fvwgts[i*ncon+l] < 0) + gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + } + else { + graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights " + "for the %d constraints.\n", i+1, ncon); + if (graph->ivwgts[i*ncon+l] < 0) + gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l); + } + head = tail; + } + } + + + /* Read the rest of the row */ + while (1) { + ival = (int)strtol(head, &tail, 0); + if (tail == head) + break; + head = tail; + + if ((graph->adjncy[k] = ival + numbering) < 0) + gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i); + + if (readvals) { + if (isfewgts) { +#ifdef __MSC__ + fval = (float)strtod(head, &tail); +#else + fval = strtof(head, &tail); +#endif + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); + + graph->fadjwgt[k] = fval; + } + else { + ival = strtol(head, &tail, 0); + if (tail == head) + gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k); + + graph->iadjwgt[k] = ival; + } + head = tail; + } + k++; + } + graph->xadj[i+1] = k; + } + + if (k != nedges) + gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in " + "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k); + + gk_fclose(fpin); + + gk_free((void **)&line, LTERM); + + break; + + case GK_GRAPH_FMT_IJV: + case GK_GRAPH_FMT_HIJV: + gk_getfilestats(filename, &nvtxs, &nedges, NULL, NULL); + + if (format == GK_GRAPH_FMT_HIJV) { /* remove the #rows/#cols values and row */ + nedges -= 2; + nvtxs -= 1; + } + + if (hasvals == 1 && 3*nvtxs != nedges) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nedges, hasvals); + if (hasvals == 0 && 2*nvtxs != nedges) + gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nedges, hasvals); + + nedges = nvtxs; + numbering = (numbering ? -1 : 0); + + /* read the data into three arrays */ + iinds = gk_i32malloc(nedges, "iinds"); + jinds = gk_i32malloc(nedges, "jinds"); + if (hasvals) { + if (isfewgts) + fvals = gk_fmalloc(nedges, "fvals"); + else + ivals = gk_i32malloc(nedges, "ivals"); + } + + fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin"); + + if (format == GK_GRAPH_FMT_HIJV) { /* read and ignore the #rows/#cols values */ + if (fscanf(fpin, "%"SCNd64" %"SCNd64, &i, &i) != 2) + gk_errexit(SIGERR, "Error: Failed to read the header line.\n"); + } + + for (nvtxs=0, i=0; i<nedges; i++) { + if (hasvals) { + if (isfewgts) { + if (fscanf(fpin, "%"PRId32" %"PRId32" %f", &iinds[i], &jinds[i], &fvals[i]) != 3) + gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i); + } + else { + if (fscanf(fpin, "%"PRId32" %"PRId32" %"PRId32, &iinds[i], &jinds[i], &ivals[i]) != 3) + gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i); + } + } + else { + if (fscanf(fpin, "%"PRId32" %"PRId32, &iinds[i], &jinds[i]) != 2) + gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nedge: %zd.\n", i); + } + iinds[i] += numbering; + jinds[i] += numbering; + + if (nvtxs < iinds[i]) + nvtxs = iinds[i]; + if (nvtxs < jinds[i]) + nvtxs = jinds[i]; + } + gk_fclose(fpin); + + /* convert (i, j, v) into a graph format */ + graph = gk_graph_Create(); + graph->nvtxs = ++nvtxs; + xadj = graph->xadj = gk_zsmalloc(nvtxs+1, 0, "xadj"); + adjncy = graph->adjncy = gk_i32malloc(nedges, "adjncy"); + if (hasvals) { + if (isfewgts) + fadjwgt = graph->fadjwgt = gk_fmalloc(nedges, "fadjwgt"); + else + iadjwgt = graph->iadjwgt = gk_i32malloc(nedges, "iadjwgt"); + } + + for (i=0; i<nedges; i++) + xadj[iinds[i]]++; + MAKECSR(i, nvtxs, xadj); + + for (i=0; i<nedges; i++) { + adjncy[xadj[iinds[i]]] = jinds[i]; + if (hasvals) { + if (isfewgts) + fadjwgt[xadj[iinds[i]]] = fvals[i]; + else + iadjwgt[xadj[iinds[i]]] = ivals[i]; + } + xadj[iinds[i]]++; + } + SHIFTCSR(i, nvtxs, xadj); + + gk_free((void **)&iinds, &jinds, &fvals, &ivals, LTERM); + break; + + default: + gk_errexit(SIGERR, "Unrecognized format: %d\n", format); + } + + return graph; +} + + +/**************************************************************************/ +/*! Writes a graph into a file. + \param graph is the graph to be written, + \param filename is the name of the output file. + \param format specifies the format of the output file. + \param numbering is either 0 or 1, indicating if the first vertex + will be numbered 0 or 1. Some formats ignore this. +*/ +/**************************************************************************/ +void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering) +{ + int32_t i; + ssize_t j; + int hasvwgts, hasvsizes, hasewgts; + FILE *fpout; + + if (filename) + fpout = gk_fopen(filename, "w", "gk_graph_Write: fpout"); + else + fpout = stdout; + + + hasewgts = (graph->iadjwgt || graph->fadjwgt); + hasvwgts = (graph->ivwgts || graph->fvwgts); + hasvsizes = (graph->ivsizes || graph->fvsizes); + + switch (format) { + case GK_GRAPH_FMT_METIS: + /* write the header line */ + fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2); + if (hasvwgts || hasvsizes || hasewgts) + fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts); + fprintf(fpout, "\n"); + + + for (i=0; i<graph->nvtxs; i++) { + if (hasvsizes) { + if (graph->ivsizes) + fprintf(fpout, " %d", graph->ivsizes[i]); + else + fprintf(fpout, " %f", graph->fvsizes[i]); + } + + if (hasvwgts) { + if (graph->ivwgts) + fprintf(fpout, " %d", graph->ivwgts[i]); + else + fprintf(fpout, " %f", graph->fvwgts[i]); + } + + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) { + fprintf(fpout, " %d", graph->adjncy[j]+1); + if (hasewgts) { + if (graph->iadjwgt) + fprintf(fpout, " %d", graph->iadjwgt[j]); + else + fprintf(fpout, " %f", graph->fadjwgt[j]); + } + } + fprintf(fpout, "\n"); + } + break; + + case GK_GRAPH_FMT_IJV: + for (i=0; i<graph->nvtxs; i++) { + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) { + fprintf(fpout, "%d %d ", i+numbering, graph->adjncy[j]+numbering); + if (hasewgts) { + if (graph->iadjwgt) + fprintf(fpout, " %d\n", graph->iadjwgt[j]); + else + fprintf(fpout, " %f\n", graph->fadjwgt[j]); + } + else { + fprintf(fpout, " 1\n"); + } + } + } + break; + + default: + gk_errexit(SIGERR, "Unknown file format. %d\n", format); + } + + if (filename) + gk_fclose(fpout); +} + + +/*************************************************************************/ +/*! Returns a copy of a graph. + \param graph is the graph to be duplicated. + \returns the newly created copy of the graph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Dup(gk_graph_t *graph) +{ + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = graph->nvtxs; + + /* copy the adjacency structure */ + if (graph->xadj) + ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, + gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj")); + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, + gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels")); + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, + gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, + gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes")); + + + if (graph->adjncy) + ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, + gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy")); + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, + gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt")); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, + gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt")); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns the transpose of a graph. + \param graph is the graph to be transposed. + \returns the newly created copy of the graph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Transpose(gk_graph_t *graph) +{ + int32_t vi, vj; + ssize_t ei; + + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = graph->nvtxs; + ngraph->xadj = gk_zsmalloc(graph->nvtxs+1, 0, "gk_graph_Transpose: xadj"); + ngraph->adjncy = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: adjncy"); + + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: iadjwgt"); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: fadjwgt"); + + for (vi=0; vi<graph->nvtxs; vi++) { + for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++) + ngraph->xadj[graph->adjncy[ei]]++; + } + MAKECSR(vi, ngraph->nvtxs, ngraph->xadj); + + for (vi=0; vi<graph->nvtxs; vi++) { + for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++) { + vj = graph->adjncy[ei]; + ngraph->adjncy[ngraph->xadj[vj]] = vi; + if (ngraph->iadjwgt) + ngraph->iadjwgt[ngraph->xadj[vj]] = graph->iadjwgt[ei]; + if (ngraph->fadjwgt) + ngraph->fadjwgt[ngraph->xadj[vj]] = graph->fadjwgt[ei]; + ngraph->xadj[vj]++; + } + } + SHIFTCSR(vi, ngraph->nvtxs, ngraph->xadj); + + /* copy vertex attributes */ + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, + gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivlabels")); + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, + gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, + gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvsizes")); + + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a subgraph containing a set of consecutive vertices. + \param graph is the original graph. + \param vstart is the starting vertex. + \param nvtxs is the number of vertices from vstart to extract. + \returns the newly created subgraph. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs) +{ + ssize_t i; + gk_graph_t *ngraph; + + if (vstart+nvtxs > graph->nvtxs) + return NULL; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = nvtxs; + + /* copy the adjancy structure */ + if (graph->xadj) + ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, + gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj")); + for (i=nvtxs; i>=0; i--) + ngraph->xadj[i] -= ngraph->xadj[0]; + ASSERT(ngraph->xadj[0] == 0); + + if (graph->ivwgts) + ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts")); + if (graph->ivsizes) + ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes")); + if (graph->vlabels) + ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, + gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels")); + + if (graph->fvwgts) + ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, + gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts")); + if (graph->fvsizes) + ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, + gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes")); + + + ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]); + if (graph->adjncy) + ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->adjncy+graph->xadj[vstart], + gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: adjncy")); + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->iadjwgt+graph->xadj[vstart], + gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: iadjwgt")); + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + graph->fadjwgt+graph->xadj[vstart], + gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], + "gk_graph_ExtractSubgraph: fadjwgt")); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a graph that has been reordered according to the permutation. + \param[IN] graph is the graph to be re-ordered. + \param[IN] perm is the new ordering of the graph's vertices + \param[IN] iperm is the original ordering of the re-ordered graph's vertices + \returns the newly created copy of the graph. + + \note Either perm or iperm can be NULL but not both. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm) +{ + ssize_t j, jj, *xadj; + int i, k, u, v, nvtxs; + int freeperm=0, freeiperm=0; + int32_t *adjncy; + gk_graph_t *ngraph; + + if (perm == NULL && iperm == NULL) + return NULL; + + ngraph = gk_graph_Create(); + + ngraph->nvtxs = nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* allocate memory for the different structures that are present in graph */ + if (graph->xadj) + ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj"); + + if (graph->ivwgts) + ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts"); + + if (graph->ivsizes) + ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes"); + + if (graph->vlabels) + ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels"); + + if (graph->fvwgts) + ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts"); + + if (graph->fvsizes) + ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes"); + + + if (graph->adjncy) + ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy"); + + if (graph->iadjwgt) + ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt"); + + if (graph->fadjwgt) + ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt"); + + + /* create perm/iperm if not provided */ + if (perm == NULL) { + freeperm = 1; + perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); + for (i=0; i<nvtxs; i++) + perm[iperm[i]] = i; + } + if (iperm == NULL) { + freeiperm = 1; + iperm = gk_i32malloc(nvtxs, "gk_graph_Reorder: iperm"); + for (i=0; i<nvtxs; i++) + iperm[perm[i]] = i; + } + + /* fill-in the information of the re-ordered graph */ + ngraph->xadj[0] = jj = 0; + for (v=0; v<nvtxs; v++) { + u = iperm[v]; + for (j=xadj[u]; j<xadj[u+1]; j++, jj++) { + ngraph->adjncy[jj] = perm[adjncy[j]]; + if (graph->iadjwgt) + ngraph->iadjwgt[jj] = graph->iadjwgt[j]; + if (graph->fadjwgt) + ngraph->fadjwgt[jj] = graph->fadjwgt[j]; + } + if (graph->ivwgts) + ngraph->ivwgts[v] = graph->ivwgts[u]; + if (graph->fvwgts) + ngraph->fvwgts[v] = graph->fvwgts[u]; + if (graph->ivsizes) + ngraph->ivsizes[v] = graph->ivsizes[u]; + if (graph->fvsizes) + ngraph->fvsizes[v] = graph->fvsizes[u]; + if (graph->vlabels) + ngraph->vlabels[v] = graph->vlabels[u]; + + ngraph->xadj[v+1] = jj; + } + + + /* free memory */ + if (freeperm) + gk_free((void **)&perm, LTERM); + if (freeiperm) + gk_free((void **)&iperm, LTERM); + + return ngraph; +} + + +/*************************************************************************/ +/*! This function finds the connected components in a graph. + + \param graph is the graph structure + \param cptr is the ptr structure of the CSR representation of the + components. The length of this vector must be graph->nvtxs+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be graph->nvtxs. + + \returns the number of components that it found. + + \note The cptr and cind parameters can be NULL, in which case only the + number of connected components is returned. +*/ +/*************************************************************************/ +int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind) +{ + ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps; + ssize_t *xadj; + int32_t *adjncy, *pos, *todo; + int32_t mustfree_ccsr=0; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = gk_i32malloc(nvtxs+1, "gk_graph_FindComponents: cptr"); + cind = gk_i32malloc(nvtxs, "gk_graph_FindComponents: cind"); + mustfree_ccsr = 1; + } + + /* The list of vertices that have not been touched yet. + The valid entries are from [0..ntodo). */ + todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: todo")); + + /* For a vertex that has not been visited, pos[i] is the position in the + todo list that this vertex is stored. + If a vertex has been visited, pos[i] = -1. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos")); + + + /* Find the connected componends */ + ncmps = -1; + ntodo = nvtxs; /* All vertices have not been visited */ + first = last = 0; /* Point to the first and last vertices that have been touched + but not explored. + These vertices are stored in cind[first]...cind[last-1]. */ + while (1) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; /* Mark the end of the current CC */ + + if (ntodo > 0) { + /* put the first vertex in the todo list as the start of the new CC */ + GKASSERT(pos[todo[0]] != -1); + cind[last++] = todo[0]; + + pos[todo[0]] = -1; + todo[0] = todo[--ntodo]; + pos[todo[0]] = 0; + } + else { + break; + } + } + + i = cind[first++]; /* Get the first visited but unexplored vertex */ + + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (pos[k] != -1) { + cind[last++] = k; + + /* Remove k from the todo list and put the last item in the todo + list at the position that k was so that the todo list will be + consequtive. The pos[] array is updated accordingly to keep track + the location of the vertices in the todo[] list. */ + todo[pos[k]] = todo[--ntodo]; + pos[todo[pos[k]]] = pos[k]; + pos[k] = -1; + } + } + } + GKASSERT(first == nvtxs); + + if (mustfree_ccsr) + gk_free((void **)&cptr, &cind, LTERM); + + gk_free((void **)&pos, &todo, LTERM); + + return (int) ncmps; +} + + +/*************************************************************************/ +/*! This function computes a permutation of the vertices based on a + breadth-first-traversal. It can be used for re-ordering the graph + to reduce its bandwidth for better cache locality. + The algorithm used is a simplified version of the method used to find + the connected components. + + \param[IN] graph is the graph structure + \param[IN] v is the starting vertex of the BFS + \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph. + \param[OUT] iperm[i] stores the ID of the vertex that corresponds to + the ith vertex in the re-ordered graph. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, + int32_t **r_iperm) +{ + ssize_t j, *xadj; + int i, k, nvtxs, first, last; + int32_t *adjncy, *cot, *pos; + + if (graph->nvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* This array will function like pos + touched of the CC method */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: pos")); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored vertices; + Positions from [first...last) is the OPEN list (i.e., visited vertices); + Positions from [last...nvtxs) is the todo list. */ + cot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: cot")); + + + /* put v at the front of the todo list */ + pos[0] = cot[0] = v; + pos[v] = cot[v] = 0; + + /* compute a BFS ordering from the seed vertex */ + first = last = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = cot[last]; + ASSERT(pos[k] != -1); + pos[k] = -1; /* mark node as being visited */ + last++; + } + + i = cot[first++]; /* the ++ advances the explored vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + /* if a node has already been visited, its pos[] will be -1 */ + if (pos[k] != -1) { + /* pos[k] is the location within cot[] where k resides (it is in the 'todo' part); + It is placed in that location cot[last] (end of OPEN list) that we + are about to overwrite and update pos[cot[last]] to reflect that. */ + cot[pos[k]] = cot[last]; /* put the head of the todo list to + where k was in the todo list */ + pos[cot[last]] = pos[k]; /* update perm to reflect the move */ + + cot[last++] = k; /* put node at the end of the OPEN list */ + pos[k] = -1; /* mark node as being visited */ + } + } + } + + /* time to decide what to return */ + if (r_perm != NULL) { + /* use the 'pos' array to build the perm array */ + for (i=0; i<nvtxs; i++) + pos[cot[i]] = i; + + *r_perm = pos; + pos = NULL; + } + + if (r_iperm != NULL) { + *r_iperm = cot; + cot = NULL; + } + + + /* cleanup memory */ + gk_free((void **)&pos, &cot, LTERM); + +} + + +/*************************************************************************/ +/*! This function computes a permutation of the vertices based on a + best-first-traversal. It can be used for re-ordering the graph + to reduce its bandwidth for better cache locality. + + \param[IN] graph is the graph structure. + \param[IN] v is the starting vertex of the best-first traversal. + \param[IN] type indicates the criteria to use to measure the 'bestness' + of a vertex. + \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph. + \param[OUT] iperm[i] stores the ID of the vertex that corresponds to + the ith vertex in the re-ordered graph. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm) +{ + ssize_t j, jj, *xadj; + int i, k, u, nvtxs; + int32_t *adjncy, *perm, *degrees, *minIDs, *open; + gk_i32pq_t *queue; + + if (graph->nvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); + + /* the minimum vertex ID of an open vertex to the closed list */ + minIDs = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs"); + + /* the open list */ + open = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open"); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; + otherwise perm[i] == -1. + */ + perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); + + /* create the queue and put everything in it */ + queue = gk_i32pqCreate(nvtxs); + for (i=0; i<nvtxs; i++) + gk_i32pqInsert(queue, i, 0); + gk_i32pqUpdate(queue, v, 1); + + open[0] = v; + + /* start processing the nodes */ + for (i=0; i<nvtxs; i++) { + if ((v = gk_i32pqGetTop(queue)) == -1) + gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i); + if (perm[v] != -1) + gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v); + perm[v] = i; + + + for (j=xadj[v]; j<xadj[v+1]; j++) { + u = adjncy[j]; + if (perm[u] == -1) { + degrees[u]++; + minIDs[u] = (i < minIDs[u] ? i : minIDs[u]); + + switch (type) { + case 1: /* DFS */ + gk_i32pqUpdate(queue, u, 1); + break; + case 2: /* Max in closed degree */ + gk_i32pqUpdate(queue, u, degrees[u]); + break; + case 3: /* Sum of orders in closed list */ + for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) { + if (perm[adjncy[jj]] != -1) + k += perm[adjncy[jj]]; + } + gk_i32pqUpdate(queue, u, k); + break; + case 4: /* Sum of order-differences (w.r.t. current number) in closed + list (updated once in a while) */ + for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) { + if (perm[adjncy[jj]] != -1) + k += (i-perm[adjncy[jj]]); + } + gk_i32pqUpdate(queue, u, k); + break; + default: + ; + } + } + } + } + + + /* time to decide what to return */ + if (r_perm != NULL) { + *r_perm = perm; + perm = NULL; + } + + if (r_iperm != NULL) { + /* use the 'degrees' array to build the iperm array */ + for (i=0; i<nvtxs; i++) + degrees[perm[i]] = i; + + *r_iperm = degrees; + degrees = NULL; + } + + + + /* cleanup memory */ + gk_i32pqDestroy(queue); + gk_free((void **)&perm, °rees, &minIDs, &open, LTERM); + +} + + +/*************************************************************************/ +/*! This function computes a permutation of the vertices based on a + best-first-traversal. It can be used for re-ordering the graph + to reduce its bandwidth for better cache locality. + + \param[IN] graph is the graph structure. + \param[IN] v is the starting vertex of the best-first traversal. + \param[IN] type indicates the criteria to use to measure the 'bestness' + of a vertex. + \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph. + \param[OUT] iperm[i] stores the ID of the vertex that corresponds to + the ith vertex in the re-ordered graph. + + \note The perm or iperm (but not both) can be NULL, at which point, + the corresponding arrays are not returned. Though the program + works fine when both are NULL, doing that is not smart. + The returned arrays should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, + int32_t **r_perm, int32_t **r_iperm) +{ + ssize_t j, jj, *xadj; + int i, k, u, nvtxs, nopen, ntodo; + int32_t *adjncy, *perm, *degrees, *sod, *level, *ot, *pos; + int64_t *wdegrees; + gk_i32pq_t *queue; + + if (graph->nvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* the degree of the vertices in the closed list */ + degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees"); + + /* the weighted degree of the vertices in the closed list for type==3 */ + wdegrees = gk_i64smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees"); + + /* the sum of differences for type==4 */ + sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod"); + + /* the encountering level of a vertex type==5 */ + level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level"); + + /* The open+todo list of vertices. + The vertices from [0..nopen] are the open vertices. + The vertices from [nopen..ntodo) are the todo vertices. + */ + ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot")); + + /* For a vertex that has not been explored, pos[i] is the position in the ot list. */ + pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos")); + + /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */ + perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm"); + + /* create the queue and put the starting vertex in it */ + queue = gk_i32pqCreate(nvtxs); + gk_i32pqInsert(queue, v, 1); + + /* put v at the front of the open list */ + pos[0] = ot[0] = v; + pos[v] = ot[v] = 0; + nopen = 1; + ntodo = nvtxs; + + /* start processing the nodes */ + for (i=0; i<nvtxs; i++) { + if (nopen == 0) { /* deal with non-connected graphs */ + gk_i32pqInsert(queue, ot[0], 1); + nopen++; + } + + if ((v = gk_i32pqGetTop(queue)) == -1) + gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i); + + if (perm[v] != -1) + gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v); + perm[v] = i; + + if (ot[pos[v]] != v) + gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v); + if (pos[v] >= nopen) + gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen); + + /* remove v from the open list and re-arrange the todo part of the list */ + ot[pos[v]] = ot[nopen-1]; + pos[ot[nopen-1]] = pos[v]; + if (ntodo > nopen) { + ot[nopen-1] = ot[ntodo-1]; + pos[ot[ntodo-1]] = nopen-1; + } + nopen--; + ntodo--; + + for (j=xadj[v]; j<xadj[v+1]; j++) { + u = adjncy[j]; + if (perm[u] == -1) { + /* update ot list, if u is not in the open list by putting it at the end + of the open list. */ + if (degrees[u] == 0) { + ot[pos[u]] = ot[nopen]; + pos[ot[nopen]] = pos[u]; + ot[nopen] = u; + pos[u] = nopen; + nopen++; + + level[u] = level[v]+1; + gk_i32pqInsert(queue, u, 0); + } + + + /* update the in-closed degree */ + degrees[u]++; + + /* update the queues based on the type */ + switch (type) { + case 1: /* DFS */ + gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]); + break; + + case 2: /* Max in closed degree */ + gk_i32pqUpdate(queue, u, degrees[u]); + break; + + case 3: /* Sum of orders in closed list */ + wdegrees[u] += i; + gk_i32pqUpdate(queue, u, (int32_t)sqrt(wdegrees[u])); + break; + + case 4: /* Sum of order-differences */ + /* this is handled at the end of the loop */ + ; + break; + + case 5: /* BFS with in degree priority */ + gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u])); + break; + + case 6: /* Hybrid of 1+2 */ + gk_i32pqUpdate(queue, u, (i+1)*degrees[u]); + break; + + default: + ; + } + } + } + + if (type == 4) { /* update all the vertices in the open list */ + for (j=0; j<nopen; j++) { + u = ot[j]; + if (perm[u] != -1) + gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]); + sod[u] += degrees[u]; + if (i<1000 || i%25==0) + gk_i32pqUpdate(queue, u, sod[u]); + } + } + + /* + for (j=0; j<ntodo; j++) { + if (pos[ot[j]] != j) + gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j); + } + */ + + } + + + /* time to decide what to return */ + if (r_perm != NULL) { + *r_perm = perm; + perm = NULL; + } + + if (r_iperm != NULL) { + /* use the 'degrees' array to build the iperm array */ + for (i=0; i<nvtxs; i++) + degrees[perm[i]] = i; + + *r_iperm = degrees; + degrees = NULL; + } + + + + /* cleanup memory */ + gk_i32pqDestroy(queue); + gk_free((void **)&perm, °rees, &wdegrees, &sod, &ot, &pos, &level, LTERM); + +} + + +/*************************************************************************/ +/*! This function computes the single-source shortest path lengths from the + root node to all the other nodes in the graph. If the graph is not + connected then, the sortest part to the vertices in the other components + is -1. + + \param[IN] graph is the graph structure. + \param[IN] v is the root of the single-source shortest path computations. + \param[IN] type indicates the criteria to use to measure the 'bestness' + of a vertex. + \param[OUT] sps[i] stores the length of the shortest path from v to vertex i. + If no such path exists, then it is -1. Note that the returned + array will be either an array of int32_t or an array of floats. + The specific type is determined by the existance of non NULL + iadjwgt and fadjwgt arrays. If both of these arrays exist, then + priority is given to iadjwgt. + + \note The returned array should be freed with gk_free(). +*/ +/*************************************************************************/ +void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps) +{ + ssize_t *xadj; + int i, u, nvtxs; + int32_t *adjncy, *inqueue; + + if (graph->nvtxs <= 0) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue"); + + /* determine if you will be computing using int32_t or float and proceed from there */ + if (graph->iadjwgt != NULL) { + gk_i32pq_t *queue; + int32_t *adjwgt; + int32_t *sps; + + adjwgt = graph->iadjwgt; + + queue = gk_i32pqCreate(nvtxs); + gk_i32pqInsert(queue, v, 0); + inqueue[v] = 1; + + sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); + sps[v] = 0; + + /* start processing the nodes */ + while ((v = gk_i32pqGetTop(queue)) != -1) { + inqueue[v] = 2; + + /* relax the adjacent edges */ + for (i=xadj[v]; i<xadj[v+1]; i++) { + u = adjncy[i]; + if (inqueue[u] == 2) + continue; + + if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) { + sps[u] = sps[v]+adjwgt[i]; + + if (inqueue[u]) + gk_i32pqUpdate(queue, u, -sps[u]); + else { + gk_i32pqInsert(queue, u, -sps[u]); + inqueue[u] = 1; + } + } + } + } + + *r_sps = (void *)sps; + + gk_i32pqDestroy(queue); + } + else { + gk_fpq_t *queue; + float *adjwgt; + float *sps; + + adjwgt = graph->fadjwgt; + + queue = gk_fpqCreate(nvtxs); + gk_fpqInsert(queue, v, 0); + inqueue[v] = 1; + + sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps"); + sps[v] = 0; + + /* start processing the nodes */ + while ((v = gk_fpqGetTop(queue)) != -1) { + inqueue[v] = 2; + + /* relax the adjacent edges */ + for (i=xadj[v]; i<xadj[v+1]; i++) { + u = adjncy[i]; + if (inqueue[u] == 2) + continue; + + if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) { + sps[u] = sps[v]+adjwgt[i]; + + if (inqueue[u]) + gk_fpqUpdate(queue, u, -sps[u]); + else { + gk_fpqInsert(queue, u, -sps[u]); + inqueue[u] = 1; + } + } + } + } + + *r_sps = (void *)sps; + + gk_fpqDestroy(queue); + } + + gk_free((void **)&inqueue, LTERM); + +} + + +/*************************************************************************/ +/*! Sorts the adjacency lists in increasing vertex order + \param graph the graph itself, +*/ +/**************************************************************************/ +void gk_graph_SortAdjacencies(gk_graph_t *graph) +{ + int32_t nvtxs, nn=0; + ssize_t *xadj; + int32_t *adjncy; + int32_t *iadjwgt; + float *fadjwgt; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + iadjwgt = graph->iadjwgt; + fadjwgt = graph->fadjwgt; + + #pragma omp parallel if (nvtxs > 100) + { + ssize_t i, j, k; + gk_ikv_t *cand; + int32_t *itwgts=NULL; + float *ftwgts=NULL; + + #pragma omp single + for (i=0; i<nvtxs; i++) + nn = gk_max(nn, xadj[i+1]-xadj[i]); + + cand = gk_ikvmalloc(nn, "gk_graph_SortIndices: cand"); + if (iadjwgt) + itwgts = gk_i32malloc(nn, "gk_graph_SortIndices: itwgts"); + if (fadjwgt) + ftwgts = gk_fmalloc(nn, "gk_graph_SortIndices: ftwgts"); + + #pragma omp for schedule(static) + for (i=0; i<nvtxs; i++) { + for (k=0, j=xadj[i]; j<xadj[i+1]; j++) { + if (j > xadj[i] && adjncy[j] < adjncy[j-1]) + k = 1; /* an inversion */ + cand[j-xadj[i]].val = (int32_t)(j-xadj[i]); + cand[j-xadj[i]].key = adjncy[j]; + if (itwgts) + itwgts[j-xadj[i]] = iadjwgt[j]; + if (ftwgts) + ftwgts[j-xadj[i]] = fadjwgt[j]; + } + if (k) { + gk_ikvsorti(xadj[i+1]-xadj[i], cand); + for (j=xadj[i]; j<xadj[i+1]; j++) { + adjncy[j] = cand[j-xadj[i]].key; + if (itwgts) + iadjwgt[j] = itwgts[cand[j-xadj[i]].val]; + if (ftwgts) + fadjwgt[j] = ftwgts[cand[j-xadj[i]].val]; + } + } + } + + gk_free((void **)&cand, &itwgts, &ftwgts, LTERM); + } +} + + +/*************************************************************************/ +/*! Returns a symmetric version of a graph. The symmetric version + is constructed by applying an A op A^T operation, where op is one of + GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, GK_GRAPH_SYM_AVG. + + \param mat the matrix to be symmetrized, + \param op indicates the operation to be performed. The possible values are + GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, and GK_GRAPH_SYM_AVG. + + \returns the symmetrized matrix consisting only of its row-based structure. + The input matrix is not modified. + +TODO: Need to deal with all vertex attributes that are currently do not get + copied over. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op) +{ + ssize_t i, j, k, nnz; + int nrows, nadj, hasvals; + ssize_t *rowptr, *colptr, *nrowptr; + int *rowind, *colind, *nrowind, *marker, *ids; + float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL; + int32_t *irowval=NULL, *icolval=NULL, *nirowval=NULL, *iwgts=NULL; + gk_graph_t *ngraph; + + hasvals = (graph->iadjwgt != NULL || graph->fadjwgt != NULL); + + nrows = graph->nvtxs; + rowptr = graph->xadj; + rowind = graph->adjncy; + if (hasvals) { + irowval = graph->iadjwgt; + rowval = graph->fadjwgt; + } + + /* create the column view for efficient processing */ + colptr = gk_zsmalloc(nrows+1, 0, "colptr"); + colind = gk_i32malloc(rowptr[nrows], "colind"); + if (hasvals) { + if (rowval) + colval = gk_fmalloc(rowptr[nrows], "colval"); + if (irowval) + icolval = gk_i32malloc(rowptr[nrows], "icolval"); + } + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + colptr[rowind[j]]++; + } + MAKECSR(i, nrows, colptr); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + colind[colptr[rowind[j]]] = i; + if (hasvals) { + if (rowval) + colval[colptr[rowind[j]]] = rowval[j]; + if (irowval) + icolval[colptr[rowind[j]]] = irowval[j]; + } + colptr[rowind[j]]++; + } + } + SHIFTCSR(i, nrows, colptr); + + + ngraph = gk_graph_Create(); + ngraph->nvtxs = graph->nvtxs; + + nrowptr = ngraph->xadj = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr"); + nrowind = ngraph->adjncy = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind"); + if (hasvals) { + if (rowval) + nrowval = graph->fadjwgt = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + if (irowval) + nirowval = graph->iadjwgt = gk_i32malloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval"); + } + + marker = gk_ismalloc(nrows, -1, "marker"); + ids = gk_imalloc(nrows, "ids"); + if (hasvals) { + if (rowval) + wgts = gk_fmalloc(nrows, "wgts"); + if (irowval) + iwgts = gk_i32malloc(nrows, "wgts"); + } + + nrowptr[0] = nnz = 0; + for (i=0; i<nrows; i++) { + nadj = 0; + /* out-edges */ + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + ids[nadj] = rowind[j]; + if (wgts) + wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]); + if (iwgts) + iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*irowval[j] : irowval[j]); + marker[rowind[j]] = nadj++; + } + + /* in-edges */ + for (j=colptr[i]; j<colptr[i+1]; j++) { + if (marker[colind[j]] == -1) { + if (op != GK_CSR_SYM_MIN) { + ids[nadj] = colind[j]; + if (wgts) + wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]); + if (iwgts) + iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*icolval[j] : icolval[j]); + nadj++; + } + } + else { + if (wgts) { + switch (op) { + case GK_CSR_SYM_MAX: + wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_MIN: + wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_SUM: + wgts[marker[colind[j]]] += colval[j]; + break; + case GK_CSR_SYM_AVG: + wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]); + break; + default: + errexit("Unsupported op for MakeSymmetric!\n"); + } + } + if (iwgts) { + switch (op) { + case GK_CSR_SYM_MAX: + iwgts[marker[colind[j]]] = gk_max(icolval[j], iwgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_MIN: + iwgts[marker[colind[j]]] = gk_min(icolval[j], iwgts[marker[colind[j]]]); + break; + case GK_CSR_SYM_SUM: + iwgts[marker[colind[j]]] += icolval[j]; + break; + case GK_CSR_SYM_AVG: + iwgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + icolval[j]); + break; + default: + errexit("Unsupported op for MakeSymmetric!\n"); + } + } + marker[colind[j]] = -1; + } + } + + /* go over out edges again to resolve any edges that were not found in the in + * edges */ + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (marker[rowind[j]] != -1) { + if (op == GK_CSR_SYM_MIN) + ids[marker[rowind[j]]] = -1; + marker[rowind[j]] = -1; + } + } + + /* put the non '-1' entries in ids[] into i's row */ + for (j=0; j<nadj; j++) { + if (ids[j] != -1) { + nrowind[nnz] = ids[j]; + if (wgts) + nrowval[nnz] = wgts[j]; + if (iwgts) + nirowval[nnz] = iwgts[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + + gk_free((void **)&colptr, &colind, &colval, &icolval, &marker, &ids, &wgts, &iwgts, LTERM); + + return ngraph; +} + + + +#ifdef XXX + +/*************************************************************************/ +/*! Returns a subgraphrix containing a certain set of rows. + \param graph is the original graphrix. + \param nrows is the number of rows to extract. + \param rind is the set of row numbers to extract. + \returns the row structure of the newly created subgraphrix. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_ExtractRows(gk_graph_t *graph, int nrows, int *rind) +{ + ssize_t i, ii, j, nnz; + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nrows = nrows; + ngraph->ncols = graph->ncols; + + for (nnz=0, i=0; i<nrows; i++) + nnz += graph->rowptr[rind[i]+1]-graph->rowptr[rind[i]]; + + ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); + ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); + ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); + + ngraph->rowptr[0] = 0; + for (nnz=0, j=0, ii=0; ii<nrows; ii++) { + i = rind[ii]; + gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); + gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + ngraph->rowptr[++j] = nnz; + } + ASSERT(j == ngraph->nrows); + + return ngraph; +} + + +/*************************************************************************/ +/*! Returns a subgraphrix corresponding to a specified partitioning of rows. + \param graph is the original graphrix. + \param part is the partitioning vector of the rows. + \param pid is the partition ID that will be extracted. + \returns the row structure of the newly created subgraphrix. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid) +{ + ssize_t i, j, nnz; + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + ngraph->nrows = 0; + ngraph->ncols = graph->ncols; + + for (nnz=0, i=0; i<graph->nrows; i++) { + if (part[i] == pid) { + ngraph->nrows++; + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + } + } + + ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr"); + ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind"); + ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval"); + + ngraph->rowptr[0] = 0; + for (nnz=0, j=0, i=0; i<graph->nrows; i++) { + if (part[i] == pid) { + gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz); + gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz); + nnz += graph->rowptr[i+1]-graph->rowptr[i]; + ngraph->rowptr[++j] = nnz; + } + } + ASSERT(j == ngraph->nrows); + + return ngraph; +} + + +/*************************************************************************/ +/*! Splits the graphrix into multiple sub-graphrices based on the provided + color array. + \param graph is the original graphrix. + \param color is an array of size equal to the number of non-zeros + in the graphrix (row-wise structure). The graphrix is split into + as many parts as the number of colors. For meaningfull results, + the colors should be numbered consecutively starting from 0. + \returns an array of graphrices for each supplied color number. +*/ +/**************************************************************************/ +gk_graph_t **gk_graph_Split(gk_graph_t *graph, int *color) +{ + ssize_t i, j; + int nrows, ncolors; + ssize_t *rowptr; + int *rowind; + float *rowval; + gk_graph_t **sgraphs; + + nrows = graph->nrows; + rowptr = graph->rowptr; + rowind = graph->rowind; + rowval = graph->rowval; + + ncolors = gk_imax(rowptr[nrows], color)+1; + + sgraphs = (gk_graph_t **)gk_malloc(sizeof(gk_graph_t *)*ncolors, "gk_graph_Split: sgraphs"); + for (i=0; i<ncolors; i++) { + sgraphs[i] = gk_graph_Create(); + sgraphs[i]->nrows = graph->nrows; + sgraphs[i]->ncols = graph->ncols; + sgraphs[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_graph_Split: sgraphs[i]->rowptr"); + } + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + sgraphs[color[j]]->rowptr[i]++; + } + for (i=0; i<ncolors; i++) + MAKECSR(j, nrows, sgraphs[i]->rowptr); + + for (i=0; i<ncolors; i++) { + sgraphs[i]->rowind = gk_imalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowind"); + sgraphs[i]->rowval = gk_fmalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowval"); + } + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + sgraphs[color[j]]->rowind[sgraphs[color[j]]->rowptr[i]] = rowind[j]; + sgraphs[color[j]]->rowval[sgraphs[color[j]]->rowptr[i]] = rowval[j]; + sgraphs[color[j]]->rowptr[i]++; + } + } + + for (i=0; i<ncolors; i++) + SHIFTCSR(j, nrows, sgraphs[i]->rowptr); + + return sgraphs; +} + + +/*************************************************************************/ +/*! Prunes certain rows/columns of the graphrix. The prunning takes place + by analyzing the row structure of the graphrix. The prunning takes place + by removing rows/columns but it does not affect the numbering of the + remaining rows/columns. + + \param graph the graphrix to be prunned, + \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) + of the graphrix will be prunned, + \param minf is the minimum number of rows (columns) that a column (row) must + be present in order to be kept, + \param maxf is the maximum number of rows (columns) that a column (row) must + be present at in order to be kept. + \returns the prunned graphrix consisting only of its row-based structure. + The input graphrix is not modified. +*/ +/**************************************************************************/ +gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf) +{ + ssize_t i, j, nnz; + int nrows, ncols; + ssize_t *rowptr, *nrowptr; + int *rowind, *nrowind, *collen; + float *rowval, *nrowval; + gk_graph_t *ngraph; + + ngraph = gk_graph_Create(); + + nrows = ngraph->nrows = graph->nrows; + ncols = ngraph->ncols = graph->ncols; + + rowptr = graph->rowptr; + rowind = graph->rowind; + rowval = graph->rowval; + + nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr"); + nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind"); + nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval"); + + + switch (what) { + case GK_CSR_COL: + collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen"); + + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + ASSERT(rowind[j] < ncols); + collen[rowind[j]]++; + } + } + for (i=0; i<ncols; i++) + collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0); + + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + if (collen[rowind[j]]) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + nnz++; + } + } + nrowptr[i+1] = nnz; + } + gk_free((void **)&collen, LTERM); + break; + + case GK_CSR_ROW: + nrowptr[0] = 0; + for (nnz=0, i=0; i<nrows; i++) { + if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) { + for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) { + nrowind[nnz] = rowind[j]; + nrowval[nnz] = rowval[j]; + } + } + nrowptr[i+1] = nnz; + } + break; + + default: + gk_graph_Free(&ngraph); + gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); + return NULL; + } + + return ngraph; +} + + + +/*************************************************************************/ +/*! Normalizes the rows/columns of the graphrix to be unit + length. + \param graph the graphrix itself, + \param what indicates what will be normalized and is obtained by + specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. + \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm +*/ +/**************************************************************************/ +void gk_graph_Normalize(gk_graph_t *graph, int what, int norm) +{ + ssize_t i, j; + int n; + ssize_t *ptr; + float *val, sum; + + if (what&GK_CSR_ROW && graph->rowval) { + n = graph->nrows; + ptr = graph->rowptr; + val = graph->rowval; + + #pragma omp parallel if (ptr[n] > OMPMINOPS) + { + #pragma omp for private(j,sum) schedule(static) + for (i=0; i<n; i++) { + for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++){ + if (norm == 2) + sum += val[j]*val[j]; + else if (norm == 1) + sum += val[j]; /* assume val[j] > 0 */ + } + if (sum > 0) { + if (norm == 2) + sum=1.0/sqrt(sum); + else if (norm == 1) + sum=1.0/sum; + for (j=ptr[i]; j<ptr[i+1]; j++) + val[j] *= sum; + + } + } + } + } + + if (what&GK_CSR_COL && graph->colval) { + n = graph->ncols; + ptr = graph->colptr; + val = graph->colval; + + #pragma omp parallel if (ptr[n] > OMPMINOPS) + { + #pragma omp for private(j,sum) schedule(static) + for (i=0; i<n; i++) { + for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++) + if (norm == 2) + sum += val[j]*val[j]; + else if (norm == 1) + sum += val[j]; + if (sum > 0) { + if (norm == 2) + sum=1.0/sqrt(sum); + else if (norm == 1) + sum=1.0/sum; + for (j=ptr[i]; j<ptr[i+1]; j++) + val[j] *= sum; + } + } + } + } +} + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/htable.c b/3rdParty/metis/metis-5.1.1/GKlib/htable.c new file mode 100644 index 000000000..078e11434 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/htable.c @@ -0,0 +1,247 @@ +/* + * Copyright 2004, Regents of the University of Minnesota + * + * This file contains routines for manipulating a direct-access hash table + * + * Started 3/22/04 + * George + * + */ + +#include <GKlib.h> + +/****************************************************************************** +* This function creates the hash-table +*******************************************************************************/ +gk_HTable_t *HTable_Create(int nelements) +{ + gk_HTable_t *htable; + + htable = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable"); + htable->harray = gk_ikvmalloc(nelements, "HTable_Create: harray"); + htable->nelements = nelements; + + HTable_Reset(htable); + + return htable; +} + + +/****************************************************************************** +* This function resets the data-structures associated with the hash-table +*******************************************************************************/ +void HTable_Reset(gk_HTable_t *htable) +{ + int i; + + for (i=0; i<htable->nelements; i++) + htable->harray[i].key = HTABLE_EMPTY; + htable->htsize = 0; + +} + +/****************************************************************************** +* This function resizes the hash-table +*******************************************************************************/ +void HTable_Resize(gk_HTable_t *htable, int nelements) +{ + int i, old_nelements; + gk_ikv_t *old_harray; + + old_nelements = htable->nelements; + old_harray = htable->harray; + + /* prepare larger hash */ + htable->nelements = nelements; + htable->htsize = 0; + htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray"); + for (i=0; i<nelements; i++) + htable->harray[i].key = HTABLE_EMPTY; + + /* reassign the values */ + for (i=0; i<old_nelements; i++) + if (old_harray[i].key != HTABLE_EMPTY) + HTable_Insert(htable, old_harray[i].key, old_harray[i].val); + + /* remove old harray */ + gk_free((void **)&old_harray, LTERM); +} + + +/****************************************************************************** +* This function inserts a key-value pair in the array +*******************************************************************************/ +void HTable_Insert(gk_HTable_t *htable, int key, int val) +{ + int i, first; + + if (htable->htsize > htable->nelements/2) + HTable_Resize(htable, 2*htable->nelements); + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; i<htable->nelements; i++) { + if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) { + htable->harray[i].key = key; + htable->harray[i].val = val; + htable->htsize++; + return; + } + } + + for (i=0; i<first; i++) { + if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) { + htable->harray[i].key = key; + htable->harray[i].val = val; + htable->htsize++; + return; + } + } + +} + + +/****************************************************************************** +* This function deletes key from the htable +*******************************************************************************/ +void HTable_Delete(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; i<htable->nelements; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return; + } + } + + for (i=0; i<first; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return; + } + } + +} + + +/****************************************************************************** +* This function returns the data associated with the key in the hastable +*******************************************************************************/ +int HTable_Search(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; i<htable->nelements; i++) { + if (htable->harray[i].key == key) + return htable->harray[i].val; + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + for (i=0; i<first; i++) { + if (htable->harray[i].key == key) + return htable->harray[i].val; + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + return -1; +} + + +/****************************************************************************** +* This function returns the next key/val +*******************************************************************************/ +int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type) +{ + int i; + static int first, last; + + if (type == HTABLE_FIRST) + first = last = HTable_HFunction(htable->nelements, key); + + if (first > last) { + for (i=first; i<htable->nelements; i++) { + if (htable->harray[i].key == key) { + *r_val = htable->harray[i].val; + first = i+1; + return 1; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + first = 0; + } + + for (i=first; i<last; i++) { + if (htable->harray[i].key == key) { + *r_val = htable->harray[i].val; + first = i+1; + return 1; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + return -1; + } + + return -1; +} + + +/****************************************************************************** +* This function returns the data associated with the key in the hastable +*******************************************************************************/ +int HTable_SearchAndDelete(gk_HTable_t *htable, int key) +{ + int i, first; + + first = HTable_HFunction(htable->nelements, key); + + for (i=first; i<htable->nelements; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return htable->harray[i].val; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n"); + } + + for (i=0; i<first; i++) { + if (htable->harray[i].key == key) { + htable->harray[i].key = HTABLE_DELETED; + htable->htsize--; + return htable->harray[i].val; + } + else if (htable->harray[i].key == HTABLE_EMPTY) + gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n"); + } + + return -1; + +} + + + +/****************************************************************************** +* This function destroys the data structures associated with the hash-table +*******************************************************************************/ +void HTable_Destroy(gk_HTable_t *htable) +{ + gk_free((void **)&htable->harray, &htable, LTERM); +} + + +/****************************************************************************** +* This is the hash-function. Based on multiplication +*******************************************************************************/ +int HTable_HFunction(int nelements, int key) +{ + return (int)(key%nelements); +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/io.c b/3rdParty/metis/metis-5.1.1/GKlib/io.c new file mode 100644 index 000000000..a15648356 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/io.c @@ -0,0 +1,621 @@ +/*! +\file io.c +\brief Various file I/O functions. + +This file contains various functions that perform I/O. + +\date Started 4/10/95 +\author George +\version\verbatim $Id: io.c 18951 2015-08-08 20:10:46Z karypis $ \endverbatim +*/ + +#ifdef HAVE_GETLINE +/* Get getline to be defined. */ +#define _GNU_SOURCE +#include <stdio.h> +#undef _GNU_SOURCE +#endif + +#include <GKlib.h> + +/************************************************************************* +* This function opens a file +**************************************************************************/ +FILE *gk_fopen(char *fname, char *mode, const char *msg) +{ + FILE *fp; + char errmsg[8192]; + + fp = fopen(fname, mode); + if (fp != NULL) + return fp; + + sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg); + perror(errmsg); + errexit("Failed on gk_fopen()\n"); + + return NULL; +} + + +/************************************************************************* +* This function closes a file +**************************************************************************/ +void gk_fclose(FILE *fp) +{ + fclose(fp); +} + + +/*************************************************************************/ +/*! This function is a wrapper around the read() function that ensures + that all data is been read, by issuing multiple read requests. + The only time when not 'count' items are read is when the EOF has been + reached. +*/ +/*************************************************************************/ +ssize_t gk_read(int fd, void *vbuf, size_t count) +{ + char *buf = (char *)vbuf; + ssize_t rsize, tsize=count; + + do { + if ((rsize = read(fd, buf, tsize)) == -1) + return -1; + buf += rsize; + tsize -= rsize; + } while (tsize > 0 && rsize > 0); + + return count-tsize; +} + + +/*************************************************************************/ +/*! This function is a wrapper around the write() function that ensures + that all data is been written, by issueing multiple write requests. +*/ +/*************************************************************************/ +ssize_t gk_write(int fd, void *vbuf, size_t count) +{ + char *buf = (char *)vbuf; + ssize_t size, tsize=count; + + do { + if ((size = write(fd, buf, tsize)) == -1) + return -1; + buf += size; + tsize -= size; + } while (tsize > 0); + + return count; +} + + + + +/*************************************************************************/ +/*! This function is the GKlib implementation of glibc's getline() + function. + \returns -1 if the EOF has been reached, otherwise it returns the + number of bytes read. +*/ +/*************************************************************************/ +gk_idx_t gk_getline(char **lineptr, size_t *n, FILE *stream) +{ +#ifdef HAVE_GETLINE + return getline(lineptr, n, stream); +#else + size_t i; + int ch; + + if (feof(stream)) + return -1; + + /* Initial memory allocation if *lineptr is NULL */ + if (*lineptr == NULL || *n == 0) { + *n = 1024; + *lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr"); + } + + /* get into the main loop */ + i = 0; + while ((ch = getc(stream)) != EOF) { + (*lineptr)[i++] = (char)ch; + + /* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */ + if (i+1 == *n) { + *n = 2*(*n); + *lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr"); + } + + if (ch == '\n') + break; + } + (*lineptr)[i] = '\0'; + + return (i == 0 ? -1 : i); +#endif +} + + +/*************************************************************************/ +/*! This function reads the contents of a text file and returns it in the + form of an array of strings. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +char **gk_readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL, **lines=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + while (gk_getline(&line, &lnlen, fpin) != -1) { + gk_strtprune(line, "\n\r"); + lines[nlines++] = gk_strdup(line); + } + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return lines; +} + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of int32_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int32_t *gk_i32readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + int32_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_i32malloc(nlines, "gk_i32readfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%"SCNd32, &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of int64_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int64_t *gk_i64readfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + int64_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_i64malloc(nlines, "gk_i64readfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%"SCNd64, &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a file and returns it in the + form of an array of ssize_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +ssize_t *gk_zreadfile(char *fname, size_t *r_nlines) +{ + size_t lnlen, nlines=0; + char *line=NULL; + ssize_t *array=NULL; + FILE *fpin; + + gk_getfilestats(fname, &nlines, NULL, NULL, NULL); + if (nlines > 0) { + array = gk_zmalloc(nlines, "gk_zreadfile: array"); + + fpin = gk_fopen(fname, "r", "gk_readfile"); + nlines = 0; + + while (gk_getline(&line, &lnlen, fpin) != -1) { + sscanf(line, "%zd", &array[nlines++]); + } + + gk_fclose(fpin); + } + + gk_free((void **)&line, LTERM); + + if (r_nlines != NULL) + *r_nlines = nlines; + + return array; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of int32_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + int32_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(int32_t) != 0) { + gk_errexit(SIGERR, "The size [%zd] of the file [%s] is not in multiples of sizeof(int32_t).\n", fsize, fname); + return NULL; + } + + nelmnts = fsize/sizeof(int32_t); + array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_i32readfilebin"); + + if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(int32_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of int64_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + int64_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(int64_t) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(int64_t); + array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_i64readfilebin"); + + if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(int64_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of ssize_t. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + ssize_t *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(ssize_t) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(ssize_t).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(ssize_t); + array = gk_zmalloc(nelmnts, "gk_zreadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_zreadfilebin"); + + if (fread(array, sizeof(ssize_t), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(ssize_t), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of float. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +float *gk_freadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + float *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(float) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(float); + array = gk_fmalloc(nelmnts, "gk_freadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_freadfilebin"); + + if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_fwritefilebin(char *fname, size_t n, float *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_fwritefilebin"); + + fsize = fwrite(a, sizeof(float), n, fp); + + gk_fclose(fp); + + return fsize; +} + +/*************************************************************************/ +/*! This function reads the contents of a binary file and returns it in the + form of an array of double. + \param fname is the name of the file + \param r_nlines is the number of lines in the file. If it is NULL, + this information is not returned. +*/ +/*************************************************************************/ +double *gk_dreadfilebin(char *fname, size_t *r_nelmnts) +{ + size_t nelmnts; + ssize_t fsize; + double *array=NULL; + FILE *fpin; + + *r_nelmnts = 0; + + fsize = gk_getfsize(fname); + + if (fsize == -1) { + gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname); + return NULL; + } + + if (fsize%sizeof(double) != 0) { + gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n"); + return NULL; + } + + nelmnts = fsize/sizeof(double); + array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array"); + + fpin = gk_fopen(fname, "rb", "gk_dreadfilebin"); + + if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) { + gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts); + gk_free((void **)&array, LTERM); + return NULL; + } + gk_fclose(fpin); + + *r_nelmnts = nelmnts; + + return array; +} + +/*************************************************************************/ +/*! This function writes the contents of an array into a binary file. + \param fname is the name of the file + \param n the number of elements in the array. + \param a the array to be written out. +*/ +/*************************************************************************/ +size_t gk_dwritefilebin(char *fname, size_t n, double *a) +{ + size_t fsize; + FILE *fp; + + fp = gk_fopen(fname, "wb", "gk_writefilebin"); + + fsize = fwrite(a, sizeof(double), n, fp); + + gk_fclose(fp); + + return fsize; +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/itemsets.c b/3rdParty/metis/metis-5.1.1/GKlib/itemsets.c new file mode 100644 index 000000000..beb58aea5 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/itemsets.c @@ -0,0 +1,210 @@ +/*! + * \file + * \brief Frequent/Closed itemset discovery routines + * + * This file contains the code for finding frequent/closed itemests. These routines + * are implemented using a call-back mechanism to deal with the discovered itemsets. + * + * \date 6/13/2008 + * \author George Karypis + * \version\verbatim $Id: itemsets.c 19240 2015-10-22 12:41:19Z karypis $ \endverbatim + */ + +#include <GKlib.h> + +/*-------------------------------------------------------------*/ +/*! Data structures for use within this module */ +/*-------------------------------------------------------------*/ +typedef struct { + int minfreq; /* the minimum frequency of a pattern */ + int maxfreq; /* the maximum frequency of a pattern */ + int minlen; /* the minimum length of the requested pattern */ + int maxlen; /* the maximum length of the requested pattern */ + int tnitems; /* the initial range of the item space */ + + /* the call-back function */ + void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids); + void *stateptr; /* the user-supplied pointer to pass to the callback */ + + /* workspace variables */ + int *rmarker; + gk_ikv_t *cand; +} isparams_t; + + +/*-------------------------------------------------------------*/ +/*! Prototypes for this module */ +/*-------------------------------------------------------------*/ +void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, + int preflen, int *prefix); +gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid); + + + +/*************************************************************************/ +/*! The entry point of the frequent itemset discovery code */ +/*************************************************************************/ +void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, + int minfreq, int maxfreq, int minlen, int maxlen, + void (*process_itemset)(void *stateptr, int nitems, int *itemids, + int ntrans, int *transids), + void *stateptr) +{ + ssize_t i; + gk_csr_t *mat, *pmat; + isparams_t params; + int *pattern; + + /* Create the matrix */ + mat = gk_csr_Create(); + mat->nrows = ntrans; + mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind, 1)]+1; + mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr")); + mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); + mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); + + /* Setup the parameters */ + params.minfreq = minfreq; + params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); + params.minlen = minlen; + params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); + params.tnitems = mat->ncols; + params.callback = process_itemset; + params.stateptr = stateptr; + params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); + params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); + + /* Perform the initial projection */ + gk_csr_CreateIndex(mat, GK_CSR_COL); + pmat = itemsets_project_matrix(¶ms, mat, -1); + gk_csr_Free(&mat); + + pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); + itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); + + gk_csr_Free(&pmat); + gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); + +} + + + +/*************************************************************************/ +/*! The recursive routine for DFS-based frequent pattern discovery */ +/*************************************************************************/ +void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, + int preflen, int *prefix) +{ + ssize_t i; + gk_csr_t *cmat; + + /* Project each frequent column */ + for (i=0; i<mat->ncols; i++) { + prefix[preflen] = mat->colids[i]; + + if (preflen+1 >= params->minlen) + (*params->callback)(params->stateptr, preflen+1, prefix, + mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]); + + if (preflen+1 < params->maxlen) { + cmat = itemsets_project_matrix(params, mat, i); + itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix); + gk_csr_Free(&cmat); + } + } + +} + + +/******************************************************************************/ +/*! This function projects a matrix w.r.t. to a particular column. + It performs the following steps: + - Determines the length of each column that is remaining. + - Sorts the columns in increasing length. + - Creates a column-based version of the matrix with the proper + column ordering. + */ +/*******************************************************************************/ +gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid) +{ + ssize_t i, j, k, ii, pnnz; + int nrows, ncols, pnrows, pncols; + ssize_t *colptr, *pcolptr; + int *colind, *colids, *pcolind, *pcolids, *rmarker; + gk_csr_t *pmat; + gk_ikv_t *cand; + + nrows = mat->nrows; + ncols = mat->ncols; + colptr = mat->colptr; + colind = mat->colind; + colids = mat->colids; + + rmarker = params->rmarker; + cand = params->cand; + + + /* Allocate space for the projected matrix based on what you know thus far */ + pmat = gk_csr_Create(); + pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]); + + + /* Mark the rows that will be kept and determine the prowids */ + if (cid == -1) { /* Initial projection */ + gk_iset(nrows, 1, rmarker); + } + else { /* The other projections */ + for (i=colptr[cid]; i<colptr[cid+1]; i++) + rmarker[colind[i]] = 1; + } + + + /* Determine the length of each column that will be left in the projected matrix */ + for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) { + for (k=0, j=colptr[i]; j<colptr[i+1]; j++) { + k += rmarker[colind[j]]; + } + if (k >= params->minfreq && k <= params->maxfreq) { + cand[pncols].val = i; + cand[pncols++].key = k; + pnnz += k; + } + } + + /* Sort the columns in increasing order */ + gk_ikvsorti(pncols, cand); + + + /* Allocate space for the remaining fields of the projected matrix */ + pmat->ncols = pncols; + pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids"); + pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr"); + pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind"); + + + /* Populate the projected matrix */ + pcolptr[0] = 0; + for (pnnz=0, ii=0; ii<pncols; ii++) { + i = cand[ii].val; + for (j=colptr[i]; j<colptr[i+1]; j++) { + if (rmarker[colind[j]]) + pcolind[pnnz++] = colind[j]; + } + + pcolids[ii] = colids[i]; + pcolptr[ii+1] = pnnz; + } + + + /* Reset the rmarker array */ + if (cid == -1) { /* Initial projection */ + gk_iset(nrows, 0, rmarker); + } + else { /* The other projections */ + for (i=colptr[cid]; i<colptr[cid+1]; i++) + rmarker[colind[i]] = 0; + } + + + return pmat; +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/mcore.c b/3rdParty/metis/metis-5.1.1/GKlib/mcore.c new file mode 100644 index 000000000..6442e03a9 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/mcore.c @@ -0,0 +1,393 @@ +/*! +\file +\brief Functions dealing with creating and allocating mcores + +\date Started 5/30/11 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version $Id: mcore.c 13953 2013-03-30 16:20:07Z karypis $ +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! This function creates an mcore + */ +/*************************************************************************/ +gk_mcore_t *gk_mcoreCreate(size_t coresize) +{ + gk_mcore_t *mcore; + + mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore"); + memset(mcore, 0, sizeof(gk_mcore_t)); + + mcore->coresize = coresize; + mcore->corecpos = 0; + + mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core")); + + /* allocate the memory for keeping track of malloc ops */ + mcore->nmops = 2048; + mcore->cmop = 0; + mcore->mops = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops"); + + return mcore; +} + + +/*************************************************************************/ +/*! This function creates an mcore. This version is used for gkmcore. + */ +/*************************************************************************/ +gk_mcore_t *gk_gkmcoreCreate() +{ + gk_mcore_t *mcore; + + if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL) + return NULL; + memset(mcore, 0, sizeof(gk_mcore_t)); + + /* allocate the memory for keeping track of malloc ops */ + mcore->nmops = 2048; + mcore->cmop = 0; + if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) { + free(mcore); + return NULL; + } + + return mcore; +} + + +/*************************************************************************/ +/*! This function destroys an mcore. + */ +/*************************************************************************/ +void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats) +{ + gk_mcore_t *mcore = *r_mcore; + + if (mcore == NULL) + return; + + if (showstats) + printf("\n gk_mcore statistics\n" + " coresize: %12zu nmops: %12zu cmop: %6zu\n" + " num_callocs: %12zu num_hallocs: %12zu\n" + " size_callocs: %12zu size_hallocs: %12zu\n" + " cur_callocs: %12zu cur_hallocs: %12zu\n" + " max_callocs: %12zu max_hallocs: %12zu\n", + mcore->coresize, mcore->nmops, mcore->cmop, + mcore->num_callocs, mcore->num_hallocs, + mcore->size_callocs, mcore->size_hallocs, + mcore->cur_callocs, mcore->cur_hallocs, + mcore->max_callocs, mcore->max_hallocs); + + if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) { + printf("***Warning: mcore memory was not fully freed when destroyed.\n" + " cur_callocs: %6zu cur_hallocs: %6zu cmop: %6zu\n", + mcore->cur_callocs, mcore->cur_hallocs, mcore->cmop); + } + + gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM); + + *r_mcore = NULL; +} + + +/*************************************************************************/ +/*! This function destroys an mcore. This version is for gkmcore. + */ +/*************************************************************************/ +void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats) +{ + gk_mcore_t *mcore = *r_mcore; + + if (mcore == NULL) + return; + + if (showstats) + printf("\n gk_mcore statistics\n" + " nmops: %12zu cmop: %6zu\n" + " num_hallocs: %12zu\n" + " size_hallocs: %12zu\n" + " cur_hallocs: %12zu\n" + " max_hallocs: %12zu\n", + mcore->nmops, mcore->cmop, + mcore->num_hallocs, + mcore->size_hallocs, + mcore->cur_hallocs, + mcore->max_hallocs); + + if (mcore->cur_hallocs != 0 || mcore->cmop != 0) { + printf("***Warning: mcore memory was not fully freed when destroyed.\n" + " cur_hallocs: %6zu cmop: %6zu\n", + mcore->cur_hallocs, mcore->cmop); + } + + free(mcore->mops); + free(mcore); + + *r_mcore = NULL; +} + + +/*************************************************************************/ +/*! This function allocate space from the core/heap + */ +/*************************************************************************/ +void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes) +{ + void *ptr; + + /* pad to make pointers 8-byte aligned */ + nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8); + + if (mcore->corecpos + nbytes < mcore->coresize) { + /* service this request from the core */ + ptr = ((char *)mcore->core)+mcore->corecpos; + mcore->corecpos += nbytes; + + gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr); + } + else { + /* service this request from the heap */ + ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr"); + + gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr); + } + + /* + printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1, + mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes); + */ + + return ptr; +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes + */ +/*************************************************************************/ +void gk_mcorePush(gk_mcore_t *mcore) +{ + gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL); + /* printf("MCPPUSH: %zu\n", mcore->cmop-1); */ +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes. This is the gkmcore version. + */ +/*************************************************************************/ +void gk_gkmcorePush(gk_mcore_t *mcore) +{ + gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL); + /* printf("MCPPUSH: %zu\n", mcore->cmop-1); */ +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push + */ +/*************************************************************************/ +void gk_mcorePop(gk_mcore_t *mcore) +{ + while (mcore->cmop > 0) { + mcore->cmop--; + switch (mcore->mops[mcore->cmop].type) { + case GK_MOPT_MARK: /* push marker */ + goto DONE; + break; + + case GK_MOPT_CORE: /* core free */ + if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes) + errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n", + mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes); + + mcore->corecpos -= mcore->mops[mcore->cmop].nbytes; + mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes; + break; + + case GK_MOPT_HEAP: /* heap free */ + gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM); + mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes; + break; + + default: + gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type); + } + } + +DONE: + ; + /*printf("MCPPOP: %zu\n", mcore->cmop); */ +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push. This version is + for poping the gkmcore and it uses free instead of gk_free. + */ +/*************************************************************************/ +void gk_gkmcorePop(gk_mcore_t *mcore) +{ + while (mcore->cmop > 0) { + mcore->cmop--; + switch (mcore->mops[mcore->cmop].type) { + case GK_MOPT_MARK: /* push marker */ + goto DONE; + break; + + case GK_MOPT_HEAP: /* heap free */ + free(mcore->mops[mcore->cmop].ptr); + mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes; + break; + + default: + gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type); + } + } + +DONE: + ; +} + + +/*************************************************************************/ +/*! Adds a memory allocation at the end of the list. + */ +/*************************************************************************/ +void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr) +{ + if (mcore->cmop == mcore->nmops) { + mcore->nmops *= 2; + mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t)); + if (mcore->mops == NULL) + gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n"); + } + + mcore->mops[mcore->cmop].type = type; + mcore->mops[mcore->cmop].nbytes = nbytes; + mcore->mops[mcore->cmop].ptr = ptr; + mcore->cmop++; + + switch (type) { + case GK_MOPT_MARK: + break; + + case GK_MOPT_CORE: + mcore->num_callocs++; + mcore->size_callocs += nbytes; + mcore->cur_callocs += nbytes; + if (mcore->max_callocs < mcore->cur_callocs) + mcore->max_callocs = mcore->cur_callocs; + break; + + case GK_MOPT_HEAP: + mcore->num_hallocs++; + mcore->size_hallocs += nbytes; + mcore->cur_hallocs += nbytes; + if (mcore->max_hallocs < mcore->cur_hallocs) + mcore->max_hallocs = mcore->cur_hallocs; + break; + default: + gk_errexit(SIGMEM, "Incorrect mcore type operation.\n"); + } +} + + +/*************************************************************************/ +/*! Adds a memory allocation at the end of the list. This is the gkmcore + version. + */ +/*************************************************************************/ +void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr) +{ + if (mcore->cmop == mcore->nmops) { + mcore->nmops *= 2; + mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t)); + if (mcore->mops == NULL) + gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n"); + } + + mcore->mops[mcore->cmop].type = type; + mcore->mops[mcore->cmop].nbytes = nbytes; + mcore->mops[mcore->cmop].ptr = ptr; + mcore->cmop++; + + switch (type) { + case GK_MOPT_MARK: + break; + + case GK_MOPT_HEAP: + mcore->num_hallocs++; + mcore->size_hallocs += nbytes; + mcore->cur_hallocs += nbytes; + if (mcore->max_hallocs < mcore->cur_hallocs) + mcore->max_hallocs = mcore->cur_hallocs; + break; + default: + gk_errexit(SIGMEM, "Incorrect mcore type operation.\n"); + } +} + + +/*************************************************************************/ +/*! This function deletes the mop associated with the supplied pointer. + The mop has to be a heap allocation, otherwise it fails violently. + */ +/*************************************************************************/ +void gk_mcoreDel(gk_mcore_t *mcore, void *ptr) +{ + int i; + + for (i=mcore->cmop-1; i>=0; i--) { + if (mcore->mops[i].type == GK_MOPT_MARK) + gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr); + + if (mcore->mops[i].ptr == ptr) { + if (mcore->mops[i].type != GK_MOPT_HEAP) + gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n"); + + mcore->cur_hallocs -= mcore->mops[i].nbytes; + mcore->mops[i] = mcore->mops[--mcore->cmop]; + return; + } + } + + gk_errexit(SIGMEM, "mcoreDel should never have been here!\n"); +} + + +/*************************************************************************/ +/*! This function deletes the mop associated with the supplied pointer. + The mop has to be a heap allocation, otherwise it fails violently. + This is the gkmcore version. + */ +/*************************************************************************/ +void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr) +{ + int i; + + for (i=mcore->cmop-1; i>=0; i--) { + if (mcore->mops[i].type == GK_MOPT_MARK) + gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr); + + if (mcore->mops[i].ptr == ptr) { + if (mcore->mops[i].type != GK_MOPT_HEAP) + gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n"); + + mcore->cur_hallocs -= mcore->mops[i].nbytes; + mcore->mops[i] = mcore->mops[--mcore->cmop]; + return; + } + } + + gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n"); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/memory.c b/3rdParty/metis/metis-5.1.1/GKlib/memory.c new file mode 100644 index 000000000..59c6d5a3a --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/memory.c @@ -0,0 +1,282 @@ +/*! +\file memory.c +\brief This file contains various allocation routines + +The allocation routines included are for 1D and 2D arrays of the +most datatypes that GKlib support. Many of these routines are +defined with the help of the macros in gk_memory.h. These macros +can be used to define other memory allocation routines. + +\date Started 4/3/2007 +\author George +\version\verbatim $Id: memory.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim +*/ + + +#include <GKlib.h> + +/* This is for the global mcore that tracks all heap allocations */ +static __thread gk_mcore_t *gkmcore = NULL; + + +/*************************************************************************/ +/*! Define the set of memory allocation routines for each data type */ +/**************************************************************************/ +GK_MKALLOC(gk_c, char) +GK_MKALLOC(gk_i, int) +GK_MKALLOC(gk_i8, int8_t) +GK_MKALLOC(gk_i16, int16_t) +GK_MKALLOC(gk_i32, int32_t) +GK_MKALLOC(gk_i64, int64_t) +GK_MKALLOC(gk_ui8, uint8_t) +GK_MKALLOC(gk_ui16, uint16_t) +GK_MKALLOC(gk_ui32, uint32_t) +GK_MKALLOC(gk_ui64, uint64_t) +GK_MKALLOC(gk_z, ssize_t) +GK_MKALLOC(gk_zu, size_t) +GK_MKALLOC(gk_f, float) +GK_MKALLOC(gk_d, double) +GK_MKALLOC(gk_idx, gk_idx_t) + +GK_MKALLOC(gk_ckv, gk_ckv_t) +GK_MKALLOC(gk_ikv, gk_ikv_t) +GK_MKALLOC(gk_i8kv, gk_i8kv_t) +GK_MKALLOC(gk_i16kv, gk_i16kv_t) +GK_MKALLOC(gk_i32kv, gk_i32kv_t) +GK_MKALLOC(gk_i64kv, gk_i64kv_t) +GK_MKALLOC(gk_zkv, gk_zkv_t) +GK_MKALLOC(gk_zukv, gk_zukv_t) +GK_MKALLOC(gk_fkv, gk_fkv_t) +GK_MKALLOC(gk_dkv, gk_dkv_t) +GK_MKALLOC(gk_skv, gk_skv_t) +GK_MKALLOC(gk_idxkv, gk_idxkv_t) + + + + + + +/*************************************************************************/ +/*! This function allocates a two-dimensional matrix. + */ +/*************************************************************************/ +void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2) +{ + size_t i, j; + void **matrix; + + *r_matrix = NULL; + + if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL) + return; + + for (i=0; i<ndim1; i++) { + if ((matrix[i] = (void *)gk_malloc(ndim2*elmlen, "gk_AllocMatrix: matrix[i]")) == NULL) { + for (j=0; j<i; j++) + gk_free((void **)&matrix[j], LTERM); + return; + } + } + + *r_matrix = matrix; +} + + +/*************************************************************************/ +/*! This function frees a two-dimensional matrix. + */ +/*************************************************************************/ +void gk_FreeMatrix(void ***r_matrix, size_t ndim1, size_t ndim2) +{ + size_t i; + void **matrix; + + if ((matrix = *r_matrix) == NULL) + return; + + for (i=0; i<ndim1; i++) + gk_free((void **)&matrix[i], LTERM); + + gk_free((void **)r_matrix, LTERM); + +} + + +/*************************************************************************/ +/*! This function initializes tracking of heap allocations. +*/ +/*************************************************************************/ +int gk_malloc_init() +{ + if (gkmcore == NULL) + gkmcore = gk_gkmcoreCreate(); + + if (gkmcore == NULL) + return 0; + + gk_gkmcorePush(gkmcore); + + return 1; +} + + +/*************************************************************************/ +/*! This function frees the memory that has been allocated since the + last call to gk_malloc_init(). +*/ +/*************************************************************************/ +void gk_malloc_cleanup(int showstats) +{ + if (gkmcore != NULL) { + gk_gkmcorePop(gkmcore); + if (gkmcore->cmop == 0) { + gk_gkmcoreDestroy(&gkmcore, showstats); + gkmcore = NULL; + } + } +} + + +/*************************************************************************/ +/*! This function is my wrapper around malloc that provides the following + enhancements over malloc: + * It always allocates one byte of memory, even if 0 bytes are requested. + This is to ensure that checks of returned values do not lead to NULL + due to 0 bytes requested. + * It zeros-out the memory that is allocated. This is for a quick init + of the underlying datastructures. +*/ +/**************************************************************************/ +void *gk_malloc(size_t nbytes, char *msg) +{ + void *ptr=NULL; + + if (nbytes == 0) + nbytes++; /* Force mallocs to actually allocate some memory */ + + ptr = (void *)malloc(nbytes); + + if (ptr == NULL) { + fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); + fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); + gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", + msg, nbytes); + return NULL; + } + + /* add this memory allocation */ + if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); + + return ptr; +} + + +/************************************************************************* +* This function is my wrapper around realloc +**************************************************************************/ +void *gk_realloc(void *oldptr, size_t nbytes, char *msg) +{ + void *ptr=NULL; + + if (nbytes == 0) + nbytes++; /* Force mallocs to actually allocate some memory */ + + /* remove this memory de-allocation */ + if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr); + + ptr = (void *)realloc(oldptr, nbytes); + + if (ptr == NULL) { + fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); + fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); + gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", + msg, nbytes); + return NULL; + } + + /* add this memory allocation */ + if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); + + return ptr; +} + + +/************************************************************************* +* This function is my wrapper around free, allows multiple pointers +**************************************************************************/ +void gk_free(void **ptr1,...) +{ + va_list plist; + void **ptr; + + if (*ptr1 != NULL) { + free(*ptr1); + + /* remove this memory de-allocation */ + if (gkmcore != NULL) + gk_gkmcoreDel(gkmcore, *ptr1); + } + *ptr1 = NULL; + + va_start(plist, ptr1); + while ((ptr = va_arg(plist, void **)) != LTERM) { + if (*ptr != NULL) { + free(*ptr); + + /* remove this memory de-allocation */ + if (gkmcore != NULL) + gk_gkmcoreDel(gkmcore, *ptr); + } + *ptr = NULL; + } + va_end(plist); +} + + +/************************************************************************* +* This function returns the current ammount of dynamically allocated +* memory that is used by the system +**************************************************************************/ +size_t gk_GetCurMemoryUsed() +{ + if (gkmcore == NULL) + return 0; + else + return gkmcore->cur_hallocs; +} + + +/************************************************************************* +* This function returns the maximum ammount of dynamically allocated +* memory that was used by the system +**************************************************************************/ +size_t gk_GetMaxMemoryUsed() +{ + if (gkmcore == NULL) + return 0; + else + return gkmcore->max_hallocs; +} + + +/*************************************************************************/ +/*! This function returns the VmSize and VmRSS of the calling process. */ +/*************************************************************************/ +void gk_GetVMInfo(size_t *vmsize, size_t *vmrss) +{ + FILE *fp; + char fname[1024]; + + sprintf(fname, "/proc/%d/statm", getpid()); + fp = gk_fopen(fname, "r", "proc/pid/statm"); + if (fscanf(fp, "%zu %zu", vmsize, vmrss) != 2) + errexit("Failed to read to values from %s\n", fname); + gk_fclose(fp); + + /* + *vmsize *= sysconf(_SC_PAGESIZE); + *vmrss *= sysconf(_SC_PAGESIZE); + */ + + return; +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/ms_inttypes.h b/3rdParty/metis/metis-5.1.1/GKlib/ms_inttypes.h new file mode 100644 index 000000000..e26204b7f --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/ms_inttypes.h @@ -0,0 +1,301 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "ms_stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/3rdParty/metis/metis-5.1.1/GKlib/ms_stat.h b/3rdParty/metis/metis-5.1.1/GKlib/ms_stat.h new file mode 100644 index 000000000..a1ef6faf7 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/ms_stat.h @@ -0,0 +1,22 @@ +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MS_STAT_H_ +#define _MS_STAT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <sys/stat.h> +/* Test macros for file types. */ + +#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask)) + +#define S_ISDIR(mode) __S_ISTYPE((mode), S_IFDIR) +#define S_ISCHR(mode) __S_ISTYPE((mode), S_IFCHR) +#define S_ISBLK(mode) __S_ISTYPE((mode), S_IFBLK) +#define S_ISREG(mode) __S_ISTYPE((mode), S_IFREG) + +#endif diff --git a/3rdParty/metis/metis-5.1.1/GKlib/ms_stdint.h b/3rdParty/metis/metis-5.1.1/GKlib/ms_stdint.h new file mode 100644 index 000000000..7e200dc6f --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/ms_stdint.h @@ -0,0 +1,222 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#if (_MSC_VER < 1300) && defined(__cplusplus) + extern "C++" { +#endif +# include <wchar.h> +#if (_MSC_VER < 1300) && defined(__cplusplus) + } +#endif + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef int intptr_t; + typedef unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/3rdParty/metis/metis-5.1.1/GKlib/pqueue.c b/3rdParty/metis/metis-5.1.1/GKlib/pqueue.c new file mode 100644 index 000000000..2fb8515d2 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/pqueue.c @@ -0,0 +1,25 @@ +/*! +\file pqueue.c +\brief This file implements various max-priority queues. + +The priority queues are generated using the GK_MKPQUEUE macro. + +\date Started 3/27/2007 +\author George +\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! Create the various max priority queues */ +/*************************************************************************/ +#define key_gt(a, b) ((a) > (b)) +GK_MKPQUEUE(gk_ipq, gk_ipq_t, gk_ikv_t, int, gk_idx_t, gk_ikvmalloc, INT_MAX, key_gt) +GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t, gk_idx_t, gk_i32kvmalloc, INT32_MAX, key_gt) +GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t, gk_idx_t, gk_i64kvmalloc, INT64_MAX, key_gt) +GK_MKPQUEUE(gk_fpq, gk_fpq_t, gk_fkv_t, float, gk_idx_t, gk_fkvmalloc, FLT_MAX, key_gt) +GK_MKPQUEUE(gk_dpq, gk_dpq_t, gk_dkv_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX, key_gt) +GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt) +#undef key_gt diff --git a/3rdParty/metis/metis-5.1.1/GKlib/random.c b/3rdParty/metis/metis-5.1.1/GKlib/random.c new file mode 100644 index 000000000..369861462 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/random.c @@ -0,0 +1,136 @@ +/*! +\file +\brief Various routines for providing portable 32 and 64 bit random number + generators. + +\date Started 5/17/2007 +\author George +\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! Create the various random number functions */ +/*************************************************************************/ +GK_MKRANDOM(gk_c, size_t, char) +GK_MKRANDOM(gk_i, size_t, int) +GK_MKRANDOM(gk_i32, size_t, int32_t) +GK_MKRANDOM(gk_f, size_t, float) +GK_MKRANDOM(gk_d, size_t, double) +GK_MKRANDOM(gk_idx, size_t, gk_idx_t) +GK_MKRANDOM(gk_z, size_t, ssize_t) +GK_MKRANDOM(gk_zu, size_t, size_t) + + + +/*************************************************************************/ +/*! GKlib's built in random number generator for portability across + different architectures */ +/*************************************************************************/ +#ifdef USE_GKRAND +/* + A C-program for MT19937-64 (2004/9/29 version). + Coded by Takuji Nishimura and Makoto Matsumoto. + + This is a 64-bit version of Mersenne Twister pseudorandom number + generator. + + Before using, initialize the state by using init_genrand64(seed) + or init_by_array64(init_key, key_length). + + Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define NN 312 +#define MM 156 +#define MATRIX_A 0xB5026F5AA96619E9ULL +#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */ +#define LM 0x7FFFFFFFULL /* Least significant 31 bits */ + + +/* The array for the state vector */ +static uint64_t mt[NN]; +/* mti==NN+1 means mt[NN] is not initialized */ +static int mti=NN+1; +#endif /* USE_GKRAND */ + +/* initializes mt[NN] with a seed */ +void gk_randinit(uint64_t seed) +{ +#ifdef USE_GKRAND + mt[0] = seed; + for (mti=1; mti<NN; mti++) + mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti); +#else + srand((unsigned int) seed); +#endif +} + + +/* generates a random number on [0, 2^64-1]-interval */ +uint64_t gk_randint64(void) +{ +#ifdef USE_GKRAND + int i; + unsigned long long x; + static uint64_t mag01[2]={0ULL, MATRIX_A}; + + if (mti >= NN) { /* generate NN words at one time */ + /* if init_genrand64() has not been called, */ + /* a default initial seed is used */ + if (mti == NN+1) + gk_randinit(5489ULL); + + for (i=0; i<NN-MM; i++) { + x = (mt[i]&UM)|(mt[i+1]&LM); + mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + } + for (; i<NN-1; i++) { + x = (mt[i]&UM)|(mt[i+1]&LM); + mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + } + x = (mt[NN-1]&UM)|(mt[0]&LM); + mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + + mti = 0; + } + + x = mt[mti++]; + + x ^= (x >> 29) & 0x5555555555555555ULL; + x ^= (x << 17) & 0x71D67FFFEDA60000ULL; + x ^= (x << 37) & 0xFFF7EEE000000000ULL; + x ^= (x >> 43); + + return x & 0x7FFFFFFFFFFFFFFF; +#else + return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand())); +#endif +} + +/* generates a random number on [0, 2^32-1]-interval */ +uint32_t gk_randint32(void) +{ +#ifdef USE_GKRAND + return (uint32_t)(gk_randint64() & 0x7FFFFFFF); +#else + return (uint32_t)rand(); +#endif +} + + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/rw.c b/3rdParty/metis/metis-5.1.1/GKlib/rw.c new file mode 100644 index 000000000..7cd4391a0 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/rw.c @@ -0,0 +1,103 @@ +/*! + * \file + * + * \brief Various routines that perform random-walk based operations + on graphs stored as gk_csr_t matrices. + * + * \author George Karypis + * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim + */ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! Computes the (personalized) page-rank of the vertices in a graph. + + \param mat is the matrix storing the graph. + \param lamda is the restart probability. + \param eps is the error tolerance for convergance. + \param max_niter is the maximum number of allowed iterations. + \param pr on entry stores the restart distribution of the vertices. + This allows for the computation of personalized page-rank scores + by appropriately setting that parameter. + On return, pr stores the computed page ranks. + + \returns the number of iterations that were performed. +*/ +/**************************************************************************/ +int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr) +{ + ssize_t i, j, k, iter, nrows; + double *rscale, *prold, *prnew, *prtmp; + double fromsinks, error; + ssize_t *rowptr; + int *rowind; + float *rowval; + + nrows = mat->nrows; + rowptr = mat->rowptr; + rowind = mat->rowind; + rowval = mat->rowval; + + prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew"); + prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold"); + rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale"); + + /* compute the scaling factors to get adjacency weights into transition + probabilities */ + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + rscale[i] += rowval[j]; + if (rscale[i] > 0) + rscale[i] = 1.0/rscale[i]; + } + + /* the restart distribution is the initial pr scores */ + for (i=0; i<nrows; i++) + prnew[i] = pr[i]; + + /* get into the PR iteration */ + for (iter=0; iter<max_niter; iter++) { + gk_SWAP(prnew, prold, prtmp); + gk_dset(nrows, 0.0, prnew); + + /* determine the total current PR score of the sinks so that you + can distribute them to all nodes according to the restart + distribution. */ + for (fromsinks=0.0, i=0; i<nrows; i++) { + if (rscale[i] == 0) + fromsinks += prold[i]; + } + + /* push random-walk scores to the outlinks */ + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j]; + } + + /* apply the restart conditions */ + for (i=0; i<nrows; i++) { + prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i]; + } + + /* compute the error */ + for (error=0.0, i=0; i<nrows; i++) + error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error); + + //printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error); + + if (error < eps) + break; + } + + /* store the computed pr scores into pr for output */ + for (i=0; i<nrows; i++) + pr[i] = prnew[i]; + + gk_free((void **)&prnew, &prold, &rscale, LTERM); + + return (int)(iter+1); + +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/scripts/gexpand.pl b/3rdParty/metis/metis-5.1.1/GKlib/scripts/gexpand.pl new file mode 100755 index 000000000..2b82134e7 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/scripts/gexpand.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl -w + +die "Usage $0 <gfile> <ncopies>\n" unless @ARGV == 2; + +$filein = shift(@ARGV); +$ncopies = shift(@ARGV); + +open(FPIN, "<$filein") or die "Could not open $filein. $!\n"; + +$_ = <FPIN>; +chomp($_); +($nvtxs, $nedges) = split(' ', $_); + +#print "nvtxs: $nvtxs, nedges: $nedges\n"; + +$u = 1; +while (<FPIN>) { + chomp($_); + @edges = split(' ', $_); + + # put the within layer edges + foreach $v (@edges) { + next if $v < $u; + for ($i=0; $i<$ncopies; $i++) { + printf("%d %d\n", $i*$nvtxs+$u-1, $i*$nvtxs+$v-1); + printf("%d %d\n", $i*$nvtxs+$v-1, $i*$nvtxs+$u-1); + } + } + + # put the vertex across layer edges + for ($i=0; $i<$ncopies-1; $i++) { + printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$u-1); + printf("%d %d\n", ($i+1)*$nvtxs+$u-1, $i*$nvtxs+$u-1); + } + + # put the adjacent across layer edges + for ($i=0; $i<$ncopies-1; $i++) { + $j=0; + foreach $v (@edges) { + $j++; + next if (($j+$i)%2 == 0); + printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$v-1); + printf("%d %d\n", ($i+1)*$nvtxs+$v-1, $i*$nvtxs+$u-1); + } + } + + goto DONE; + +DONE: + $u++; +} + +close(FPIN); diff --git a/3rdParty/metis/metis-5.1.1/GKlib/seq.c b/3rdParty/metis/metis-5.1.1/GKlib/seq.c new file mode 100644 index 000000000..f267a3ea0 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/seq.c @@ -0,0 +1,174 @@ +/* + * + * Sequence handler library by Huzefa Rangwala + * Date : 03.01.2007 + * + * + * + */ + + +#include <GKlib.h> + + + + +/*********************************************************/ +/* ! \brief Initializes the <tt>gk_seq_t</tt> variable + + + + +\param A pointer to gk_seq_t itself +\returns null +*/ +/***********************************************************************/ + +void gk_seq_init(gk_seq_t *seq) +{ + + seq->len = 0; + seq->sequence = NULL; + + seq->pssm = NULL; + seq->psfm = NULL; + + seq->name = NULL; + +} + +/***********************************************************************/ +/*! \brief This function creates the localizations for the various sequences + +\param string i.e amino acids, nucleotides, sequences +\returns gk_i2cc2i_t variable +*/ +/*********************************************************************/ + +gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet) +{ + + + int nsymbols; + gk_idx_t i; + gk_i2cc2i_t *t; + + nsymbols = strlen(alphabet); + t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common"); + t->n = nsymbols; + t->i2c = gk_cmalloc(256, "gk_i2c_create_common"); + t->c2i = gk_imalloc(256, "gk_i2c_create_common"); + + + gk_cset(256, -1, t->i2c); + gk_iset(256, -1, t->c2i); + + for(i=0;i<nsymbols;i++){ + t->i2c[i] = alphabet[i]; + t->c2i[(int)alphabet[i]] = i; + } + + return t; + +} + + +/*********************************************************************/ +/*! \brief This function reads a pssm in the format of gkmod pssm + +\param file_name is the name of the pssm file +\returns gk_seq_t +*/ +/********************************************************************/ +gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename) +{ + gk_seq_t *seq; + gk_idx_t i, j, ii; + size_t ntokens, nbytes, len; + FILE *fpin; + + + gk_Tokens_t tokens; + static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*"; + static int PSSMWIDTH = 20; + char *header, line[MAXLINELEN]; + gk_i2cc2i_t *converter; + + header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header"); + + converter = gk_i2cc2i_create_common(AAORDER); + + gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes); + len --; + + seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM"); + gk_seq_init(seq); + + seq->len = len; + seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM"); + seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); + seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); + + seq->nsymbols = PSSMWIDTH; + seq->name = gk_getbasename(filename); + + fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM"); + + + /* Read the header line */ + if (fgets(line, MAXLINELEN-1, fpin) == NULL) + errexit("Unexpected end of file: %s\n", filename); + gk_strtoupper(line); + gk_strtokenize(line, " \t\n", &tokens); + + for (i=0; i<PSSMWIDTH; i++) + header[i] = tokens.list[i][0]; + + gk_freetokenslist(&tokens); + + + /* Read the rest of the lines */ + for (i=0, ii=0; ii<len; ii++) { + if (fgets(line, MAXLINELEN-1, fpin) == NULL) + errexit("Unexpected end of file: %s\n", filename); + gk_strtoupper(line); + gk_strtokenize(line, " \t\n", &tokens); + + seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]]; + + for (j=0; j<PSSMWIDTH; j++) { + seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]); + seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]); + } + + + + gk_freetokenslist(&tokens); + i++; + } + + seq->len = i; /* Reset the length if certain characters were skipped */ + + gk_free((void **)&header, LTERM); + gk_fclose(fpin); + + return seq; +} + + +/**************************************************************************/ +/*! \brief This function frees the memory allocated to the seq structure. + +\param gk_seq_t +\returns nothing +*/ +/**************************************************************************/ +void gk_seq_free(gk_seq_t *seq) +{ + gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols); + gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols); + gk_free((void **)&seq->name, &seq->sequence, LTERM); + //gk_free((void **)&seq, LTERM); + gk_free((void **) &seq, LTERM); + +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/sort.c b/3rdParty/metis/metis-5.1.1/GKlib/sort.c new file mode 100644 index 000000000..f0144aeaa --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/sort.c @@ -0,0 +1,437 @@ +/*! +\file sort.c +\brief This file contains GKlib's various sorting routines + +These routines are implemented using the GKSORT macro that is defined +in gk_qsort.h and is based on GNU's GLIBC qsort() implementation. + +Additional sorting routines can be created using the same way that +these routines where defined. + +\date Started 4/4/07 +\author George +\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + + +/*************************************************************************/ +/*! Sorts an array of chars in increasing order */ +/*************************************************************************/ +void gk_csorti(size_t n, char *base) +{ +#define char_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(char, base, n, char_lt); +#undef char_lt +} + + +/*************************************************************************/ +/*! Sorts an array of chars in decreasing order */ +/*************************************************************************/ +void gk_csortd(size_t n, char *base) +{ +#define char_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(char, base, n, char_gt); +#undef char_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_isorti(size_t n, int *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_isortd(size_t n, int *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_i32sorti(size_t n, int32_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int32_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_i32sortd(size_t n, int32_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int32_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_i64sorti(size_t n, int64_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(int64_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_ui32sorti(size_t n, uint32_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(uint32_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_ui32sortd(size_t n, uint32_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(uint32_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in increasing order */ +/*************************************************************************/ +void gk_ui64sorti(size_t n, uint64_t *base) +{ +#define int_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(uint64_t, base, n, int_lt); +#undef int_lt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_ui64sortd(size_t n, uint64_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(uint64_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of integers in decreasing order */ +/*************************************************************************/ +void gk_i64sortd(size_t n, int64_t *base) +{ +#define int_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(int64_t, base, n, int_gt); +#undef int_gt +} + + +/*************************************************************************/ +/*! Sorts an array of floats in increasing order */ +/*************************************************************************/ +void gk_fsorti(size_t n, float *base) +{ +#define float_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(float, base, n, float_lt); +#undef float_lt +} + + +/*************************************************************************/ +/*! Sorts an array of floats in decreasing order */ +/*************************************************************************/ +void gk_fsortd(size_t n, float *base) +{ +#define float_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(float, base, n, float_gt); +#undef float_gt +} + + +/*************************************************************************/ +/*! Sorts an array of doubles in increasing order */ +/*************************************************************************/ +void gk_dsorti(size_t n, double *base) +{ +#define double_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(double, base, n, double_lt); +#undef double_lt +} + + +/*************************************************************************/ +/*! Sorts an array of doubles in decreasing order */ +/*************************************************************************/ +void gk_dsortd(size_t n, double *base) +{ +#define double_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(double, base, n, double_gt); +#undef double_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idx_t in increasing order */ +/*************************************************************************/ +void gk_idxsorti(size_t n, gk_idx_t *base) +{ +#define idx_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(gk_idx_t, base, n, idx_lt); +#undef idx_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idx_t in decreasing order */ +/*************************************************************************/ +void gk_idxsortd(size_t n, gk_idx_t *base) +{ +#define idx_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(gk_idx_t, base, n, idx_gt); +#undef idx_gt +} + + + + +/*************************************************************************/ +/*! Sorts an array of gk_ckv_t in increasing order */ +/*************************************************************************/ +void gk_ckvsorti(size_t n, gk_ckv_t *base) +{ +#define ckey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_ckv_t, base, n, ckey_lt); +#undef ckey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ckv_t in decreasing order */ +/*************************************************************************/ +void gk_ckvsortd(size_t n, gk_ckv_t *base) +{ +#define ckey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_ckv_t, base, n, ckey_gt); +#undef ckey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ikv_t in increasing order */ +/*************************************************************************/ +void gk_ikvsorti(size_t n, gk_ikv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_ikv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_ikv_t in decreasing order */ +/*************************************************************************/ +void gk_ikvsortd(size_t n, gk_ikv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_ikv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i32kv_t in increasing order */ +/*************************************************************************/ +void gk_i32kvsorti(size_t n, gk_i32kv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i32kv_t in decreasing order */ +/*************************************************************************/ +void gk_i32kvsortd(size_t n, gk_i32kv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i64kv_t in increasing order */ +/*************************************************************************/ +void gk_i64kvsorti(size_t n, gk_i64kv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt); +#undef ikey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_i64kv_t in decreasing order */ +/*************************************************************************/ +void gk_i64kvsortd(size_t n, gk_i64kv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt); +#undef ikey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zkv_t in increasing order */ +/*************************************************************************/ +void gk_zkvsorti(size_t n, gk_zkv_t *base) +{ +#define zkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_zkv_t, base, n, zkey_lt); +#undef zkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zkv_t in decreasing order */ +/*************************************************************************/ +void gk_zkvsortd(size_t n, gk_zkv_t *base) +{ +#define zkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_zkv_t, base, n, zkey_gt); +#undef zkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zukv_t in increasing order */ +/*************************************************************************/ +void gk_zukvsorti(size_t n, gk_zukv_t *base) +{ +#define zukey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_zukv_t, base, n, zukey_lt); +#undef zukey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_zukv_t in decreasing order */ +/*************************************************************************/ +void gk_zukvsortd(size_t n, gk_zukv_t *base) +{ +#define zukey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_zukv_t, base, n, zukey_gt); +#undef zukey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in increasing order */ +/*************************************************************************/ +void gk_fkvsorti(size_t n, gk_fkv_t *base) +{ +#define fkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_fkv_t, base, n, fkey_lt); +#undef fkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in decreasing order */ +/*************************************************************************/ +void gk_fkvsortd(size_t n, gk_fkv_t *base) +{ +#define fkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_fkv_t, base, n, fkey_gt); +#undef fkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_dkv_t in increasing order */ +/*************************************************************************/ +void gk_dkvsorti(size_t n, gk_dkv_t *base) +{ +#define dkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_dkv_t, base, n, dkey_lt); +#undef dkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_fkv_t in decreasing order */ +/*************************************************************************/ +void gk_dkvsortd(size_t n, gk_dkv_t *base) +{ +#define dkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_dkv_t, base, n, dkey_gt); +#undef dkey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_skv_t in increasing order */ +/*************************************************************************/ +void gk_skvsorti(size_t n, gk_skv_t *base) +{ +#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0) + GK_MKQSORT(gk_skv_t, base, n, skey_lt); +#undef skey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_skv_t in decreasing order */ +/*************************************************************************/ +void gk_skvsortd(size_t n, gk_skv_t *base) +{ +#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0) + GK_MKQSORT(gk_skv_t, base, n, skey_gt); +#undef skey_gt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idxkv_t in increasing order */ +/*************************************************************************/ +void gk_idxkvsorti(size_t n, gk_idxkv_t *base) +{ +#define idxkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt); +#undef idxkey_lt +} + + +/*************************************************************************/ +/*! Sorts an array of gk_idxkv_t in decreasing order */ +/*************************************************************************/ +void gk_idxkvsortd(size_t n, gk_idxkv_t *base) +{ +#define idxkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt); +#undef idxkey_gt +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/string.c b/3rdParty/metis/metis-5.1.1/GKlib/string.c new file mode 100644 index 000000000..562db22ef --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/string.c @@ -0,0 +1,525 @@ +/************************************************************************/ +/*! \file + +\brief Functions for manipulating strings. + +Various functions for manipulating strings. Some of these functions +provide new functionality, whereas others are drop-in replacements +of standard functions (but with enhanced functionality). + +\date Started 11/1/99 +\author George +\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $ +*/ +/************************************************************************/ + +#include <GKlib.h> + + + +/************************************************************************/ +/*! \brief Replaces certain characters in a string. + +This function takes a string and replaces all the characters in the +\c fromlist with the corresponding characters from the \c tolist. +That is, each occurence of <tt>fromlist[i]</tt> is replaced by +<tt>tolist[i]</tt>. +If the \c tolist is shorter than \c fromlist, then the corresponding +characters are deleted. The modifications on \c str are done in place. +It tries to provide a functionality similar to Perl's \b tr// function. + +\param str is the string whose characters will be replaced. +\param fromlist is the set of characters to be replaced. +\param tolist is the set of replacement characters . +\returns A pointer to \c str itself. +*/ +/************************************************************************/ +char *gk_strchr_replace(char *str, char *fromlist, char *tolist) +{ + ssize_t i, j, k, len, fromlen, tolen; + + len = strlen(str); + fromlen = strlen(fromlist); + tolen = strlen(tolist); + + for (i=j=0; i<len; i++) { + for (k=0; k<fromlen; k++) { + if (str[i] == fromlist[k]) { + if (k < tolen) + str[j++] = tolist[k]; + break; + } + } + if (k == fromlen) + str[j++] = str[i]; + } + str[j] = '\0'; + + return str; +} + + + +/************************************************************************/ +/*! \brief Regex-based search-and-replace function + +This function is a C implementation of Perl's <tt> s//</tt> regular-expression +based substitution function. + +\param str + is the input string on which the operation will be performed. +\param pattern + is the regular expression for the pattern to be matched for substitution. +\param replacement + is the replacement string, in which the possible captured pattern substrings + are referred to as $1, $2, ..., $9. The entire matched pattern is refered + to as $0. +\param options + is a string specified options for the substitution operation. Currently the + <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are + supported. +\param new_str + is a reference to a pointer that will store a pointer to the newly created + string that results from the substitutions. This string is allocated via + gk_malloc() and needs to be freed using gk_free(). The string is returned + even if no substitutions were performed. +\returns + If successful, it returns 1 + the number of substitutions that were performed. + Thus, if no substitutions were performed, the returned value will be 1. + Otherwise it returns 0. In case of error, a meaningful error message is + returned in <tt>newstr</tt>, which also needs to be freed afterwards. +*/ +/************************************************************************/ +int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, + char **new_str) +{ + ssize_t i, len, rlen, nlen, offset, noffset; + int j, rc, flags, global, nmatches; + regex_t re; + regmatch_t matches[10]; + + + /* Parse the options */ + flags = REG_EXTENDED; + if (strchr(options, 'i') != NULL) + flags = flags | REG_ICASE; + global = (strchr(options, 'g') != NULL ? 1 : 0); + + + /* Compile the regex */ + if ((rc = regcomp(&re, pattern, flags)) != 0) { + len = regerror(rc, &re, NULL, 0); + *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str"); + regerror(rc, &re, *new_str, len); + return 0; + } + + /* Prepare the output string */ + len = strlen(str); + nlen = 2*len; + noffset = 0; + *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str"); + + + /* Get into the matching-replacing loop */ + rlen = strlen(replacement); + offset = 0; + nmatches = 0; + do { + rc = regexec(&re, str+offset, 10, matches, 0); + + if (rc == REG_ESPACE) { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("regexec ran out of memory."); + regfree(&re); + return 0; + } + else if (rc == REG_NOMATCH) { + if (nlen-noffset < len-offset) { + nlen += (len-offset) - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strcpy(*new_str+noffset, str+offset); + noffset += (len-offset); + break; + } + else { /* A match was found! */ + nmatches++; + + /* Copy the left unmatched portion of the string */ + if (matches[0].rm_so > 0) { + if (nlen-noffset < matches[0].rm_so) { + nlen += matches[0].rm_so - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strncpy(*new_str+noffset, str+offset, matches[0].rm_so); + noffset += matches[0].rm_so; + } + + /* Go and append the replacement string */ + for (i=0; i<rlen; i++) { + switch (replacement[i]) { + case '\\': + if (i+1 < rlen) { + if (nlen-noffset < 1) { + nlen += nlen + 1; + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + *new_str[noffset++] = replacement[++i]; + } + else { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("Error in replacement string. Missing character following '\'."); + regfree(&re); + return 0; + } + break; + + case '$': + if (i+1 < rlen) { + j = (int)(replacement[++i] - '0'); + if (j < 0 || j > 9) { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("Error in captured subexpression specification."); + regfree(&re); + return 0; + } + + if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) { + nlen += nlen + (matches[j].rm_eo-matches[j].rm_so); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + + strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo); + noffset += matches[j].rm_eo-matches[j].rm_so; + } + else { + gk_free((void **)new_str, LTERM); + *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'."); + regfree(&re); + return 0; + } + break; + + default: + if (nlen-noffset < 1) { + nlen += nlen + 1; + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + (*new_str)[noffset++] = replacement[i]; + } + } + + /* Update the offset of str for the next match */ + offset += matches[0].rm_eo; + + if (!global) { + /* Copy the right portion of the string if no 'g' option */ + if (nlen-noffset < len-offset) { + nlen += (len-offset) - (nlen-noffset); + *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str"); + } + strcpy(*new_str+noffset, str+offset); + noffset += (len-offset); + } + } + } while (global); + + (*new_str)[noffset] = '\0'; + + regfree(&re); + return nmatches + 1; + +} + + + +/************************************************************************/ +/*! \brief Prunes characters from the end of the string. + +This function removes any trailing characters that are included in the +\c rmlist. The trimming stops at the last character (i.e., first character +from the end) that is not in \c rmlist. +This function can be used to removed trailing spaces, newlines, etc. +This is a distructive operation as it modifies the string. + +\param str is the string that will be trimmed. +\param rmlist contains the set of characters that will be removed. +\returns A pointer to \c str itself. +\sa gk_strhprune() +*/ +/*************************************************************************/ +char *gk_strtprune(char *str, char *rmlist) +{ + ssize_t i, j, len; + + len = strlen(rmlist); + + for (i=strlen(str)-1; i>=0; i--) { + for (j=0; j<len; j++) { + if (str[i] == rmlist[j]) + break; + } + if (j == len) + break; + } + + str[i+1] = '\0'; + + return str; +} + + +/************************************************************************/ +/*! \brief Prunes characters from the beginning of the string. + +This function removes any starting characters that are included in the +\c rmlist. The trimming stops at the first character that is not in +\c rmlist. +This function can be used to removed leading spaces, tabs, etc. +This is a distructive operation as it modifies the string. + +\param str is the string that will be trimmed. +\param rmlist contains the set of characters that will be removed. +\returns A pointer to \c str itself. +\sa gk_strtprune() +*/ +/*************************************************************************/ +char *gk_strhprune(char *str, char *rmlist) +{ + ssize_t i, j, len; + + len = strlen(rmlist); + + for (i=0; str[i]; i++) { + for (j=0; j<len; j++) { + if (str[i] == rmlist[j]) + break; + } + if (j == len) + break; + } + + if (i>0) { /* If something needs to be removed */ + for (j=0; str[i]; i++, j++) + str[j] = str[i]; + str[j] = '\0'; + } + + return str; +} + + +/************************************************************************/ +/*! \brief Converts a string to upper case. + +This function converts a string to upper case. This operation modifies the +string itself. + +\param str is the string whose case will be changed. +\returns A pointer to \c str itself. +\sa gk_strtolower() +*/ +/*************************************************************************/ +char *gk_strtoupper(char *str) +{ + int i; + + for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++); + return str; +} + + +/************************************************************************/ +/*! \brief Converts a string to lower case. + +This function converts a string to lower case. This operation modifies the +string itself. + +\param str is the string whose case will be changed. +\returns A pointer to \c str itself. +\sa gk_strtoupper() +*/ +/*************************************************************************/ +char *gk_strtolower(char *str) +{ + int i; + + for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++); + return str; +} + + +/************************************************************************/ +/*! \brief Duplicates a string + +This function is a replacement for C's standard <em>strdup()</em> function. +The key differences between the two are that gk_strdup(): + - uses the dynamic memory allocation routines of \e GKlib. + - it correctly handles NULL input strings. + +The string that is returned must be freed by gk_free(). + +\param orgstr is the string that will be duplicated. +\returns A pointer to the newly created string. +\sa gk_free() +*/ +/*************************************************************************/ +char *gk_strdup(char *orgstr) +{ + int len; + char *str=NULL; + + if (orgstr != NULL) { + len = strlen(orgstr)+1; + str = gk_malloc(len*sizeof(char), "gk_strdup: str"); + strcpy(str, orgstr); + } + + return str; +} + + +/************************************************************************/ +/*! \brief Case insensitive string comparison. + +This function compares two strings for equality by ignoring the case of the +strings. + +\warning This function is \b not equivalent to a case-insensitive + <em>strcmp()</em> function, as it does not return ordering + information. + +\todo Remove the above warning. + +\param s1 is the first string to be compared. +\param s2 is the second string to be compared. +\retval 1 if the strings are identical, +\retval 0 otherwise. +*/ +/*************************************************************************/ +int gk_strcasecmp(char *s1, char *s2) +{ + int i=0; + + if (strlen(s1) != strlen(s2)) + return 0; + + while (s1[i] != '\0') { + if (tolower(s1[i]) != tolower(s2[i])) + return 0; + i++; + } + + return 1; +} + + +/************************************************************************/ +/*! \brief Compare two strings in revere order + +This function is similar to strcmp but it performs the comparison as +if the two strings were reversed. + +\param s1 is the first string to be compared. +\param s2 is the second string to be compared. +\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2. +*/ +/*************************************************************************/ +int gk_strrcmp(char *s1, char *s2) +{ + int i1 = strlen(s1)-1; + int i2 = strlen(s2)-1; + + while ((i1 >= 0) && (i2 >= 0)) { + if (s1[i1] != s2[i2]) + return (s1[i1] - s2[i2]); + i1--; + i2--; + } + + /* i1 == -1 and/or i2 == -1 */ + + if (i1 < i2) + return -1; + if (i1 > i2) + return 1; + return 0; +} + + + +/************************************************************************/ +/*! \brief Converts a time_t time into a string + +This function takes a time_t-specified time and returns a string-formated +representation of the corresponding time. The format of the string is +<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time. + +\param time is the time to be converted. +\return It returns a pointer to a statically allocated string that is + over-written in successive calls of this function. If the + conversion failed, it returns NULL. + +*/ +/*************************************************************************/ +char *gk_time2str(time_t time) +{ + static char datestr[128]; + struct tm *tm; + + tm = localtime(&time); + + if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0) + return NULL; + else + return datestr; +} + + + +#if !defined(WIN32) && !defined(__MINGW32__) +/************************************************************************/ +/*! \brief Converts a date/time string into its equivalent time_t value + +This function takes date and/or time specification and converts it in +the equivalent time_t representation. The conversion is done using the +strptime() function. The format that gk_str2time() understands is +<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time. + +\param str is the date/time string to be converted. +\return If the conversion was successful it returns the time, otherwise + it returns -1. +*/ +/*************************************************************************/ +time_t gk_str2time(char *str) +{ + struct tm time; + time_t rtime; + + memset(&time, '\0', sizeof(time)); + + if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL) + return -1; + + rtime = mktime(&time); + return (rtime < 0 ? 0 : rtime); +} +#endif + + +/************************************************************************* +* This function returns the ID of a particular string based on the +* supplied StringMap array +**************************************************************************/ +int gk_GetStringID(gk_StringMap_t *strmap, char *key) +{ + int i; + + for (i=0; strmap[i].name; i++) { + if (gk_strcasecmp(key, strmap[i].name)) + return strmap[i].id; + } + + return -1; +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/GKlib/test/CMakeLists.txt new file mode 100644 index 000000000..aab630848 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/CMakeLists.txt @@ -0,0 +1,20 @@ +# Where the header files reside +#include_directories(../) + +# Build program. +add_executable(strings strings.c) +add_executable(gksort gksort.c) +add_executable(fis fis.c) +add_executable(gkrw rw.c) +add_executable(gkgraph gkgraph.c) +add_executable(csrcnv csrcnv.c) +add_executable(grKx grKx.c) +add_executable(m2mnbrs m2mnbrs.c) +add_executable(cmpnbrs cmpnbrs.c) +add_executable(splatt2svd splatt2svd.c) +foreach(prog strings gksort fis gkrw gkgraph csrcnv grKx m2mnbrs cmpnbrs splatt2svd) + target_link_libraries(${prog} GKlib) +endforeach(prog) + +# Install +install(TARGETS fis csrcnv m2mnbrs gkrw cmpnbrs gkgraph RUNTIME DESTINATION bin) diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/cmpnbrs.c b/3rdParty/metis/metis-5.1.1/GKlib/test/cmpnbrs.c new file mode 100644 index 000000000..6e3ace820 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/cmpnbrs.c @@ -0,0 +1,301 @@ +/*! +\file +\brief It takes as input two CSR matrices A and B and computes how + similar AA' and A'A are to BB' and B'B, respectively in terms + of the cosine similarity of the corresponding rows. + +\date 11/09/2015 +\author George +\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int simtype; /*!< The similarity type to use */ + int verbosity; /*!< The reporting verbosity level */ + + char *afile; /*!< The file storing the query documents */ + char *bfile; /*!< The file storing the collection documents */ + + /* timers */ + double timer_global; +} params_t; + + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +/* Versions */ +#define VER_MAJOR 0 +#define VER_MINOR 1 +#define VER_SUBMINOR 0 + +/* Command-line option codes */ +#define CMD_SIMTYPE 10 +#define CMD_VERBOSITY 70 +#define CMD_HELP 100 + +/* The text labels for the different simtypes */ +static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""}; + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"simtype", 1, 0, CMD_SIMTYPE}, + {"verbosity", 1, 0, CMD_VERBOSITY}, + + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + +static gk_StringMap_t simtype_options[] = { + {"dotp", GK_CSR_DOTP}, + {"cos", GK_CSR_COS}, + {"jac", GK_CSR_JAC}, + {NULL, 0} +}; + + +/*------------------------------------------------------------------- + * Mini help + *-------------------------------------------------------------------*/ +static char helpstr[][100] = +{ +" ", +"Usage: cmpnbrs [options] afile bfile", +" ", +" Options", +" -simtype=string", +" Specifies the type of similarity to use. Possible values are:", +" dotp - Dot-product similarity [default]", +" cos - Cosine similarity", +" jac - Jacquard similarity", +" ", +" -verbosity=int", +" Specifies the level of debugging information to be displayed.", +" Default value is 0.", +" ", +" -help", +" Prints this message.", +"" +}; + + + +/*************************************************************************/ +/*! Function prototypes */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]); +double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, gk_csr_t *bmat); + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->simtype = GK_CSR_DOTP; + params->verbosity = -1; + params->afile = NULL; + params->bfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_SIMTYPE: + if (gk_optarg) { + if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1) + errexit("Invalid simtype of %s.\n", gk_optarg); + } + break; + + case CMD_VERBOSITY: + if (gk_optarg) params->verbosity = atoi(gk_optarg); + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(EXIT_SUCCESS); + break; + + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + /* Get the input/output file info */ + if (argc-gk_optind != 2) { + printf("Missing input file info.\n Use %s -help for a summary of the options.\n", argv[0]); + exit(EXIT_FAILURE); + } + + params->afile = gk_strdup(argv[gk_optind++]); + params->bfile = gk_strdup(argv[gk_optind++]); + + if (!gk_fexists(params->afile)) + errexit("input file %s does not exist.\n", params->afile); + if (!gk_fexists(params->bfile)) + errexit("input file %s does not exist.\n", params->bfile); + + return params; +} + + +/*************************************************************************/ +/*! This is the entry point of the program */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + params_t *params; + gk_csr_t *amat, *bmat, *amatt, *bmatt; + int rc = EXIT_SUCCESS; + + params = parse_cmdline(argc, argv); + + amat = gk_csr_Read(params->afile, GK_CSR_FMT_CSR, 1, 0); + bmat = gk_csr_Read(params->bfile, GK_CSR_FMT_CSR, 1, 0); + + /* make the matrices of similar dimensions (if neccessary) */ + GKASSERT(amat->nrows == bmat->nrows); + amat->ncols = gk_max(amat->ncols, bmat->ncols); + bmat->ncols = amat->ncols; + + /* create the transpose matrices */ + amatt = gk_csr_Transpose(amat); + bmatt = gk_csr_Transpose(bmat); + + printf("********************************************************************************\n"); + printf("cmpnbrs (%d.%d.%d) Copyright 2015, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR); + printf(" simtype=%s\n", + simtypenames[params->simtype]); + printf(" afile=%s, nrows=%d, ncols=%d, nnz=%zd\n", + params->afile, amat->nrows, amat->ncols, amat->rowptr[amat->nrows]); + printf(" bfile=%s, nrows=%d, ncols=%d, nnz=%zd\n", + params->bfile, bmat->nrows, bmat->ncols, bmat->rowptr[bmat->nrows]); + + gk_clearwctimer(params->timer_global); + gk_startwctimer(params->timer_global); + + printf("SIM(AA', BB'): %.5lf\t", ComputeNeighborhoodSimilarity(params, amat, bmat)); + printf("SIM(A'A, B'B): %.5lf\n", ComputeNeighborhoodSimilarity(params, amatt, bmatt)); + + gk_stopwctimer(params->timer_global); + + printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global)); + printf("********************************************************************************\n"); + + gk_csr_Free(&amat); + gk_csr_Free(&bmat); + gk_csr_Free(&amatt); + gk_csr_Free(&bmatt); + + exit(rc); +} + + +/*************************************************************************/ +/*! Compares the neighbors of AA' vs BB' */ +/**************************************************************************/ +double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, + gk_csr_t *bmat) +{ + int iR, iH, nahits, nbhits, ncmps; + int32_t *marker; + gk_fkv_t *ahits, *bhits, *cand; + double tabsim, abdot, anorm2, bnorm2, *avec, *bvec; + + /* if cosine, make rows unit length */ + if (params->simtype == GK_CSR_COS) { + gk_csr_Normalize(amat, GK_CSR_ROW, 2); + gk_csr_Normalize(bmat, GK_CSR_ROW, 2); + } + + /* create the inverted index */ + gk_csr_CreateIndex(amat, GK_CSR_COL); + gk_csr_CreateIndex(bmat, GK_CSR_COL); + + /* compute the row squared norms */ + gk_csr_ComputeSquaredNorms(amat, GK_CSR_ROW); + gk_csr_ComputeSquaredNorms(bmat, GK_CSR_ROW); + + + /* allocate memory for the necessary working arrays */ + ahits = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: ahits"); + bhits = gk_fkvmalloc(bmat->nrows, "ComputeNeighborhoodSimilarity: bhits"); + marker = gk_i32smalloc(amat->nrows, -1, "ComputeNeighborhoodSimilarity: marker"); + cand = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: cand"); + avec = gk_dsmalloc(amat->nrows, 0.0, "ComputeNeighborhoodSimilarity: avec"); + bvec = gk_dsmalloc(bmat->nrows, 0.0, "ComputeNeighborhoodSimilarity: bvec"); + + + /* find the best neighbors for each row in the two matrices and compute + the cosine similarity between them. */ + tabsim = 0.0; + ncmps = 0; + for (iR=0; iR<amat->nrows; iR++) { + if (params->verbosity > 1) + printf("Working on row %7d\n", iR); + + if (amat->rowptr[iR+1]-amat->rowptr[iR] == 0 || + bmat->rowptr[iR+1]-bmat->rowptr[iR] == 0) + continue; + + nahits = gk_csr_GetSimilarRows(amat, + amat->rowptr[iR+1]-amat->rowptr[iR], + amat->rowind+amat->rowptr[iR], + amat->rowval+amat->rowptr[iR], + params->simtype, amat->nrows, 0.0, + ahits, marker, cand); + + nbhits = gk_csr_GetSimilarRows(bmat, + bmat->rowptr[iR+1]-bmat->rowptr[iR], + bmat->rowind+bmat->rowptr[iR], + bmat->rowval+bmat->rowptr[iR], + params->simtype, bmat->nrows, 0.0, + bhits, marker, cand); + + if (params->verbosity > 0) + printf("Row %7d %7d %7d %8zd %8zd\n", iR, nahits, nbhits, + amat->rowptr[iR+1]-amat->rowptr[iR], bmat->rowptr[iR+1]-bmat->rowptr[iR]); + + for (iH=0; iH<nahits; iH++) + avec[ahits[iH].val] = ahits[iH].key; + for (iH=0; iH<nbhits; iH++) + bvec[bhits[iH].val] = bhits[iH].key; + + for (abdot=anorm2=bnorm2=0.0, iH=0; iH<amat->nrows; iH++) { + abdot += avec[iH]*bvec[iH]; + anorm2 += avec[iH]*avec[iH]; + bnorm2 += bvec[iH]*bvec[iH]; + } + tabsim += (abdot > 0 ? abdot/sqrt(anorm2*bnorm2) : 0.0); + ncmps++; + + for (iH=0; iH<nahits; iH++) + avec[ahits[iH].val] = 0.0; + for (iH=0; iH<nbhits; iH++) + bvec[bhits[iH].val] = 0.0; + } + + gk_free((void **)&ahits, &bhits, &marker, &cand, &avec, &bvec, LTERM); + + return tabsim/ncmps; +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/csrcnv.c b/3rdParty/metis/metis-5.1.1/GKlib/test/csrcnv.c new file mode 100644 index 000000000..aef808ef2 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/csrcnv.c @@ -0,0 +1,397 @@ +/*! +\file +\brief A simple program to convert between different matrix formats that are supported + by the gk_csr_Read/gk_csr_Write functions. + +\date 5/30/2013 +\author George +\version \verbatim $Id: csrcnv.c 15314 2013-10-05 16:50:50Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int inf, outf; /* input/output format */ + int numbering; /* input numbering (output when applicable) */ + int readvals; /* input values (output when applicable) */ + int writevals; /* output values */ + int rshuf, cshuf; /* random shuffle of rows/columns */ + int symmetric; /* a symmetric shuffle */ + int mincolfreq; /* column prunning */ + int maxcolfreq; /* column prunning */ + int minrowfreq; /* row prunning */ + int maxrowfreq; /* row prunning */ + float rownrmfltr; /* row-lowfilter threshold */ + int compactcols; /* if to renumber columns to eliminate empty ones */ + int transpose; /* transpose the output matrix */ + char *srenumber; /* the iperm file for the symmetric renumbering */ + char *infile; /* input file */ + char *outfile; /* output file */ +} params_t; + + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +#define CMD_NUMONE 1 +#define CMD_NOREADVALS 2 +#define CMD_NOWRITEVALS 3 +#define CMD_RSHUF 4 +#define CMD_CSHUF 5 +#define CMD_SYMMETRIC 6 +#define CMD_MINCOLFREQ 7 +#define CMD_MAXCOLFREQ 8 +#define CMD_MINROWFREQ 9 +#define CMD_MAXROWFREQ 10 +#define CMD_ROWNRMFLTR 11 +#define CMD_COMPACTCOLS 12 +#define CMD_TRANSPOSE 13 +#define CMD_SRENUMBER 14 +#define CMD_HELP 100 + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"numone", 0, 0, CMD_NUMONE}, + {"noreadvals", 0, 0, CMD_NOREADVALS}, + {"nowritevals", 0, 0, CMD_NOWRITEVALS}, + {"rshuf", 0, 0, CMD_RSHUF}, + {"cshuf", 0, 0, CMD_CSHUF}, + {"symmetric", 0, 0, CMD_SYMMETRIC}, + {"mincolfreq", 1, 0, CMD_MINCOLFREQ}, + {"maxcolfreq", 1, 0, CMD_MAXCOLFREQ}, + {"minrowfreq", 1, 0, CMD_MINROWFREQ}, + {"maxrowfreq", 1, 0, CMD_MAXROWFREQ}, + {"rownrmfltr", 1, 0, CMD_ROWNRMFLTR}, + {"compactcols", 0, 0, CMD_COMPACTCOLS}, + {"transpose", 0, 0, CMD_TRANSPOSE}, + {"srenumber", 1, 0, CMD_SRENUMBER}, + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + + +/*-------------------------------------------------------------------*/ +/* Mini help */ +/*-------------------------------------------------------------------*/ +static char helpstr[][100] = { +" ", +"Usage: csrconv [options] <infile> <inf> <outfile> <outf>", +" ", +" Required parameters", +" infile, outfile", +" The name of the input/output CSR file.", +" ", +" inf/outf", +" The format of the input/output file.", +" Supported values are:", +" 1 GK_CSR_FMT_CLUTO", +" 2 GK_CSR_FMT_CSR", +" 3 GK_CSR_FMT_METIS", +" 4 GK_CSR_FMT_BINROW", +" 6 GK_CSR_FMT_IJV", +" 7 GK_CSR_FMT_BIJV", +" ", +" Optional parameters", +" -numone", +" Specifies that the numbering of the input file starts from 1. ", +" It only applies to CSR/IJV formats.", +" ", +" -nowritevals", +" Specifies that no values will be output.", +" ", +" -noreadvals", +" Specifies that the values will not be read when applicable.", +" ", +" -rshuf", +" Specifies that the rows will be randmly shuffled prior to output.", +" ", +" -cshuf", +" Specifies that the columns will be randmly shuffled prior to output.", +" ", +" -symmetric", +" Specifies that the row+column shuffling will be symmetric.", +" ", +" -mincolfreq=int", +" Used to prune infrequent columns.", +" ", +" -maxcolfreq=int", +" Used to prune frequent columns.", +" ", +" -minrowfreq=int", +" Used to prune infrequent rows.", +" ", +" -maxrowfreq=int", +" Used to prune frequent.", +" ", +" -rownrmfltr=float", +" The parameter to use for the row-wise low filter.", +" ", +" -compactcols", +" Specifies if empty columns will be removed and the columns renumbered.", +" ", +" -transpose", +" Specifies that the transposed matrix will be written.", +" ", +" -srenumber=iperm-file", +" Performs a symmetric renumbering based on the provided iperm file.", +" ", +" -help", +" Prints this message.", +"" +}; + +static char shorthelpstr[][100] = { +" ", +" Usage: csrconv [options] <infile> <inf> <outfile> <outf>", +" use 'csrconv -help' for a summary of the options.", +"" +}; + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->numbering = 0; + params->readvals = 1; + params->writevals = 1; + params->rshuf = 0; + params->cshuf = 0; + params->symmetric = 0; + params->transpose = 0; + params->srenumber = NULL; + + params->mincolfreq = -1; + params->minrowfreq = -1; + params->maxcolfreq = -1; + params->maxrowfreq = -1; + params->rownrmfltr = -1; + params->compactcols = 0; + + params->inf = -1; + params->outf = -1; + params->infile = NULL; + params->outfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_NUMONE: + params->numbering = 1; + break; + case CMD_NOREADVALS: + params->readvals = 0; + break; + case CMD_NOWRITEVALS: + params->writevals = 0; + break; + case CMD_RSHUF: + params->rshuf = 1; + break; + case CMD_CSHUF: + params->cshuf = 1; + break; + case CMD_SYMMETRIC: + params->symmetric = 1; + break; + case CMD_TRANSPOSE: + params->transpose = 1; + break; + + + case CMD_MINCOLFREQ: + if (gk_optarg) params->mincolfreq = atoi(gk_optarg); + break; + case CMD_MINROWFREQ: + if (gk_optarg) params->minrowfreq = atoi(gk_optarg); + break; + case CMD_MAXCOLFREQ: + if (gk_optarg) params->maxcolfreq = atoi(gk_optarg); + break; + case CMD_MAXROWFREQ: + if (gk_optarg) params->maxrowfreq = atoi(gk_optarg); + break; + case CMD_ROWNRMFLTR: + if (gk_optarg) params->rownrmfltr = atof(gk_optarg); + break; + case CMD_COMPACTCOLS: + params->compactcols = 1; + break; + + case CMD_SRENUMBER: + if (gk_optarg) { + params->srenumber = gk_strdup(gk_optarg); + if (!gk_fexists(params->srenumber)) + errexit("srenumber file %s does not exist.\n", params->srenumber); + } + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(0); + break; + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(0); + } + } + + if (argc-gk_optind != 4) { + printf("Unrecognized parameters."); + for (i=0; strlen(shorthelpstr[i]) > 0; i++) + printf("%s\n", shorthelpstr[i]); + exit(0); + } + + params->infile = gk_strdup(argv[gk_optind++]); + params->inf = atoi(argv[gk_optind++]); + params->outfile = gk_strdup(argv[gk_optind++]); + params->outf = atoi(argv[gk_optind++]); + + if (!gk_fexists(params->infile)) + errexit("input file %s does not exist.\n", params->infile); + + return params; +} + + +/*************************************************************************/ +/*! the entry point */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + int what; + params_t *params; + gk_csr_t *mat, *mat1, *smat; + + /* get command-line options */ + params = parse_cmdline(argc, argv); + + /* read the data */ + mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering); + + /* deal with weird transformations */ + if (params->mincolfreq != -1 || params->maxcolfreq != -1) { + params->mincolfreq = (params->mincolfreq == -1 ? 0 : params->mincolfreq); + params->maxcolfreq = (params->maxcolfreq == -1 ? mat->nrows : params->maxcolfreq); + + printf("Column prune: %d %d; nnz: %zd => ", + params->mincolfreq, params->maxcolfreq, mat->rowptr[mat->nrows]); + mat1 = gk_csr_Prune(mat, GK_CSR_COL, params->mincolfreq, params->maxcolfreq); + gk_csr_Free(&mat); + mat = mat1; + mat1 = NULL; + + printf("%zd\n", mat->rowptr[mat->nrows]); + } + + if (params->minrowfreq != -1 || params->maxrowfreq != -1) { + params->minrowfreq = (params->minrowfreq == -1 ? 0 : params->minrowfreq); + params->maxrowfreq = (params->maxrowfreq == -1 ? mat->ncols : params->maxrowfreq); + + printf("Row prune: %d %d; nnz: %zd => ", + params->minrowfreq, params->maxrowfreq, mat->rowptr[mat->nrows]); + mat1 = gk_csr_Prune(mat, GK_CSR_ROW, params->minrowfreq, params->maxrowfreq); + gk_csr_Free(&mat); + mat = mat1; + mat1 = NULL; + + printf("%zd\n", mat->rowptr[mat->nrows]); + } + + if (params->rownrmfltr >= 0.0) { + //gk_csr_Scale(mat, GK_CSR_LOG); + //gk_csr_Scale(mat, GK_CSR_IDF2); + + printf("Row low filter: %f; nnz: %zd => ", params->rownrmfltr, mat->rowptr[mat->nrows]); + mat1 = gk_csr_LowFilter(mat, GK_CSR_ROW, 2, params->rownrmfltr); + gk_csr_Normalize(mat1, GK_CSR_ROW, 2); + + gk_csr_Free(&mat); + mat = mat1; + mat1 = NULL; + + printf("%zd\n", mat->rowptr[mat->nrows]); + } + + if (params->compactcols) { + printf("Compacting columns: %d => ", mat->ncols); + gk_csr_CompactColumns(mat); + printf("%d\n", mat->ncols); + } + + + if (params->rshuf || params->cshuf) { + if (params->rshuf && params->cshuf) + what = GK_CSR_ROWCOL; + else if (params->rshuf) + what = GK_CSR_ROW; + else + what = GK_CSR_COL; + + smat = gk_csr_Shuffle(mat, what, params->symmetric); + gk_csr_Free(&mat); + mat = smat; + } + + + if (params->srenumber) { + int32_t i; + size_t nlines; + int32_t *iperm; + gk_csr_t *smat; + + iperm = gk_i32readfile(params->srenumber, &nlines); + if (nlines != mat->nrows && nlines != mat->ncols) + errexit("The nlines=%zud of srenumber file does not match nrows: %d, ncols: %d\n", nlines, mat->nrows, mat->ncols); + + if (gk_i32max(nlines, iperm, 1) >= nlines && gk_i32min(nlines, iperm, 1) <= 0) + errexit("The srenumber iperm seems to be wrong.\n"); + + if (gk_i32max(nlines, iperm, 1) == nlines) { /* need to renumber */ + for (i=0; i<nlines; i++) + iperm[i]--; + } + + smat = gk_csr_ReorderSymmetric(mat, iperm, NULL); + gk_csr_Free(&mat); + mat = smat; + + gk_free((void **)&iperm, LTERM); + } + + if (params->writevals && mat->rowval == NULL) + mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval"); + + if (params->transpose) { + mat1 = gk_csr_Transpose(mat); + gk_csr_Free(&mat); + mat = mat1; + mat1 = NULL; + } + + + + gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0); + + gk_csr_Free(&mat); + +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/fis.c b/3rdParty/metis/metis-5.1.1/GKlib/test/fis.c new file mode 100644 index 000000000..084a4b6a1 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/fis.c @@ -0,0 +1,286 @@ +/*! +\file +\brief A simple frequent itemset discovery program to test GKlib's routines + +\date 6/12/2008 +\author George +\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + ssize_t minlen, maxlen; + ssize_t minfreq, maxfreq; + char *filename; + int silent; + ssize_t nitemsets; + char *clabelfile; + char **clabels; +} params_t; + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +#define CMD_MINLEN 1 +#define CMD_MAXLEN 2 +#define CMD_MINFREQ 3 +#define CMD_MAXFREQ 4 +#define CMD_SILENT 5 +#define CMD_CLABELFILE 6 +#define CMD_HELP 10 + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"minlen", 1, 0, CMD_MINLEN}, + {"maxlen", 1, 0, CMD_MAXLEN}, + {"minfreq", 1, 0, CMD_MINFREQ}, + {"maxfreq", 1, 0, CMD_MAXFREQ}, + {"silent", 0, 0, CMD_SILENT}, + {"clabels", 1, 0, CMD_CLABELFILE}, + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + + +/*-------------------------------------------------------------------*/ +/* Mini help */ +/*-------------------------------------------------------------------*/ +static char helpstr[][100] = { +" ", +"Usage: fis [options] <mat-file>", +" ", +" Required parameters", +" mat-file", +" The name of the file storing the transactions. The file is in ", +" Cluto's .mat format.", +" ", +" Optional parameters", +" -minlen=int", +" Specifies the minimum length of the patterns. [default: 1]", +" ", +" -maxlen=int", +" Specifies the maximum length of the patterns. [default: none]", +" ", +" -minfreq=int", +" Specifies the minimum frequency of the patterns. [default: 10]", +" ", +" -maxfreq=int", +" Specifies the maximum frequency of the patterns. [default: none]", +" ", +" -silent", +" Does not print the discovered itemsets.", +" ", +" -clabels=filename", +" Specifies the name of the file that stores the column labels.", +" ", +" -help", +" Prints this message.", +"" +}; + +static char shorthelpstr[][100] = { +" ", +" Usage: fis [options] <mat-file>", +" use 'fis -help' for a summary of the options.", +"" +}; + + + +/*************************************************************************/ +/*! Function prototypes */ +/*************************************************************************/ +void print_init_info(params_t *params, gk_csr_t *mat); +void print_final_info(params_t *params); +params_t *parse_cmdline(int argc, char *argv[]); +void print_an_itemset(void *stateptr, int nitems, int *itemind, + int ntrans, int *tranind); + + +/*************************************************************************/ +/*! the entry point */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + ssize_t i; + char line[8192]; + FILE *fpin; + params_t *params; + gk_csr_t *mat; + + params = parse_cmdline(argc, argv); + params->nitemsets = 0; + + /* read the data */ + mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1); + gk_csr_CreateIndex(mat, GK_CSR_COL); + + /* read the column labels */ + params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels"); + if (params->clabelfile == NULL) { + for (i=0; i<mat->ncols; i++) { + sprintf(line, "%zd", i); + params->clabels[i] = gk_strdup(line); + } + } + else { + fpin = gk_fopen(params->clabelfile, "r", "main: fpin"); + for (i=0; i<mat->ncols; i++) { + if (fgets(line, 8192, fpin) == NULL) + errexit("Failed on fgets.\n"); + params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t")); + } + gk_fclose(fpin); + } + + + print_init_info(params, mat); + + gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind, + params->minfreq, params->maxfreq, params->minlen, params->maxlen, + &print_an_itemset, (void *)params); + + printf("Total itemsets found: %zd\n", params->nitemsets); + + print_final_info(params); +} + + + +/*************************************************************************/ +/*! This function prints run parameters */ +/*************************************************************************/ +void print_init_info(params_t *params, gk_csr_t *mat) +{ + printf("*******************************************************************************\n"); + printf(" fis\n\n"); + printf("Matrix Information ---------------------------------------------------------\n"); + printf(" input file=%s, [%d, %d, %zd]\n", + params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]); + + printf("\n"); + printf("Options --------------------------------------------------------------------\n"); + printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n", + params->minlen, params->maxlen, params->minfreq, params->maxfreq); + + printf("\n"); + printf("Finding patterns... -----------------------------------------------------\n"); +} + + +/*************************************************************************/ +/*! This function prints final statistics */ +/*************************************************************************/ +void print_final_info(params_t *params) +{ + printf("\n"); + printf("Memory Usage Information -----------------------------------------------------\n"); + printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed()); + printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed()); + printf("********************************************************************************\n"); +} + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->minlen = 1; + params->maxlen = -1; + params->minfreq = 10; + params->maxfreq = -1; + params->silent = 0; + params->filename = NULL; + params->clabelfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_MINLEN: + if (gk_optarg) params->minlen = atoi(gk_optarg); + break; + case CMD_MAXLEN: + if (gk_optarg) params->maxlen = atoi(gk_optarg); + break; + case CMD_MINFREQ: + if (gk_optarg) params->minfreq = atoi(gk_optarg); + break; + case CMD_MAXFREQ: + if (gk_optarg) params->maxfreq = atoi(gk_optarg); + break; + + case CMD_SILENT: + params->silent = 1; + break; + + case CMD_CLABELFILE: + if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg); + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(0); + break; + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(0); + } + } + + if (argc-gk_optind != 1) { + printf("Unrecognized parameters."); + for (i=0; strlen(shorthelpstr[i]) > 0; i++) + printf("%s\n", shorthelpstr[i]); + exit(0); + } + + params->filename = gk_strdup(argv[gk_optind++]); + + if (!gk_fexists(params->filename)) + errexit("input file %s does not exist.\n", params->filename); + + return params; +} + + + +/*************************************************************************/ +/*! This is the callback function for the itemset discovery routine */ +/*************************************************************************/ +void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans, + int *transids) +{ + ssize_t i; + params_t *params; + + params = (params_t *)stateptr; + params->nitemsets++; + + if (!params->silent) { + printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans); + for (i=0; i<nitems; i++) + printf(" %s", params->clabels[itemids[i]]); + printf("\n"); + for (i=0; i<ntrans; i++) + printf(" %d\n", transids[i]); + printf("\n"); + } +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/gkgraph.c b/3rdParty/metis/metis-5.1.1/GKlib/test/gkgraph.c new file mode 100644 index 000000000..91314647b --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/gkgraph.c @@ -0,0 +1,845 @@ +/*! +\file +\brief A simple program to try out some graph routines + +\date 6/12/2008 +\author George +\version \verbatim $Id: gkgraph.c 17700 2014-09-27 18:10:02Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int lnbits; + int cnbits; + int type; + int niter; + float eps; + float lamda; + int nosort; + int write; + + char *infile; + char *outfile; +} params_t; + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +#define CMD_NITER 1 +#define CMD_EPS 2 +#define CMD_LAMDA 3 +#define CMD_TYPE 4 +#define CMD_NOSORT 5 +#define CMD_WRITE 6 +#define CMD_LNBITS 7 +#define CMD_CNBITS 8 +#define CMD_HELP 10 + +#define CLINE32 16 +#define CLINE64 8 +#define MAXRCLOCKSPAN (1<<20) + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"lnbits", 1, 0, CMD_LNBITS}, + {"cnbits", 1, 0, CMD_CNBITS}, + {"type", 1, 0, CMD_TYPE}, + {"niter", 1, 0, CMD_NITER}, + {"lamda", 1, 0, CMD_LAMDA}, + {"eps", 1, 0, CMD_EPS}, + {"nosort", 0, 0, CMD_NOSORT}, + {"write", 0, 0, CMD_WRITE}, + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + + +/*-------------------------------------------------------------------*/ +/* Mini help */ +/*-------------------------------------------------------------------*/ +static char helpstr[][100] = { +" ", +"Usage: gkgraph [options] <graph-file> [<out-file>]", +" ", +" Required parameters", +" graph-file", +" The name of the file storing the graph. The file is in ", +" Metis' graph format.", +" ", +" Optional parameters", +" -niter=int", +" Specifies the maximum number of iterations. [default: 100]", +" ", +" -lnbits=int", +" Specifies the number of address bits indexing the cacheline. [default: 6]", +" ", +" -cnbits=int", +" Specifies the number of address bits indexing the cache. [default: 13]", +" ", +" -lamda=float", +" Specifies the follow-the-adjacent-links probability. [default: 0.80]", +" ", +" -eps=float", +" Specifies the error tollerance. [default: 1e-10]", +" ", +" -nosort", +" Does not sort the adjacency lists.", +" ", +" -write", +" Output the reordered graphs.", +" ", +" -help", +" Prints this message.", +"" +}; + +static char shorthelpstr[][100] = { +" ", +" Usage: gkgraph [options] <graph-file> [<out-file>]", +" use 'gkgraph -help' for a summary of the options.", +"" +}; + + + +/*************************************************************************/ +/*! Function prototypes */ +/*************************************************************************/ +void test_spmv(params_t *params); +void test_tc(params_t *params); +void sort_adjacencies(params_t *params, gk_graph_t *graph); +double compute_spmvstats(params_t *params, gk_graph_t *graph); +double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm); +int32_t *reorder_degrees(params_t *params, gk_graph_t *graph); +int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph); +int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph); +int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph); +int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph); +void print_init_info(params_t *params, gk_graph_t *graph); +void print_final_info(params_t *params); +params_t *parse_cmdline(int argc, char *argv[]); + + +/*************************************************************************/ +/*! the entry point */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + params_t *params; + + /* get command-line options */ + params = parse_cmdline(argc, argv); + + test_tc(params); +} + + +/*************************************************************************/ +/*! various spmv-related tests */ +/**************************************************************************/ +void test_spmv(params_t *params) +{ + ssize_t i, j, v; + gk_graph_t *graph, *pgraph; + int32_t *perm; + + /* read the data */ + graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0); + + /* display some basic stats */ + print_init_info(params, graph); + + sort_adjacencies(params, graph); + if (params->write) gk_graph_Write(graph, "original.ijv", GK_GRAPH_FMT_IJV, 1); + printf("Input SPMV HitRate: %.4lf\n", compute_spmvstats(params, graph)); + + + v = RandomInRange(graph->nvtxs); + gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "bfs.ijv", GK_GRAPH_FMT_IJV, 1); + printf("BFS SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + + perm = reorder_degrees(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "degrees.ijv", GK_GRAPH_FMT_IJV, 1); + printf("Degrees SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + + perm = reorder_freqlpn(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "freqlpn.ijv", GK_GRAPH_FMT_IJV, 1); + printf("FreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + perm = reorder_freqlpn_db(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "freqlpn-db.ijv", GK_GRAPH_FMT_IJV, 1); + printf("DBFreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + perm = reorder_minlpn(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1); + printf("MinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + perm = reorder_minlpn_db(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1); + printf("DBMinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + gk_graph_Free(&graph); + + print_final_info(params); + + return; +} + + +/*************************************************************************/ +/*! various tc-related tests */ +/**************************************************************************/ +void test_tc(params_t *params) +{ + ssize_t i, j, v; + gk_graph_t *graph, *pgraph; + int32_t *perm, *iperm; + + /* read the data */ + graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0); + + /* display some basic stats */ + print_init_info(params, graph); + + perm = reorder_degrees(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + gk_free((void **)&perm, LTERM); + sort_adjacencies(params, pgraph); + iperm = gk_i32incset(graph->nvtxs, 0, gk_i32malloc(graph->nvtxs, "iperm")); + printf("Degrees TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm)); + + + sort_adjacencies(params, pgraph); + v = RandomInRange(pgraph->nvtxs); + gk_graph_ComputeBFSOrdering(pgraph, v, &perm, NULL); + for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i; + gk_free((void **)&perm, LTERM); + printf("BFS TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm)); + + + sort_adjacencies(params, pgraph); + perm = reorder_freqlpn(params, pgraph); + for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i; + gk_free((void **)&perm, LTERM); + printf("FreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm)); + + sort_adjacencies(params, pgraph); + perm = reorder_freqlpn_db(params, pgraph); + for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i; + gk_free((void **)&perm, LTERM); + printf("DBFreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm)); + + +#ifdef XXX + perm = reorder_minlpn(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1); + printf("MinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); + + perm = reorder_minlpn_db(params, graph); + pgraph = gk_graph_Reorder(graph, perm, NULL); + sort_adjacencies(params, pgraph); + if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1); + printf("DBMinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph)); + gk_graph_Free(&pgraph); + gk_free((void **)&perm, LTERM); +#endif + + gk_free((void **)&iperm, LTERM); + gk_graph_Free(&graph); + + print_final_info(params); + + return; +} + + +/*************************************************************************/ +/*! This function sorts the adjacency lists of the vertices in increasing + order. +*/ +/*************************************************************************/ +void sort_adjacencies(params_t *params, gk_graph_t *graph) +{ + uint64_t i, nvtxs; + ssize_t *xadj; + int32_t *adjncy; + + if (params->nosort) + return; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + for (i=0; i<nvtxs; i++) + gk_i32sorti(xadj[i+1]-xadj[i], adjncy+xadj[i]); + + return; +} + + +/*************************************************************************/ +/*! This function analyzes the cache locality of an SPMV operation using + GKlib's cache simulator and returns the cache's hit rate. + */ +/*************************************************************************/ +double compute_spmvstats(params_t *params, gk_graph_t *graph) +{ + uint64_t i, nvtxs; + ssize_t *xadj; + int32_t *adjncy, *vec; + + gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); /* 8MB total; i7 spec */ + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + vec = gk_i32malloc(nvtxs, "vec"); + for (i=0; i<xadj[nvtxs]; i++) { + gk_cacheLoad(cache, (size_t)(&adjncy[i])); + gk_cacheLoad(cache, (size_t)(&vec[adjncy[i]])); + } + + gk_free((void **)&vec, LTERM); + + double hitrate = gk_cacheGetHitRate(cache); + gk_cacheDestroy(&cache); + + return hitrate; +} + + +/*************************************************************************/ +/*! The hash-map-based triangle-counting routine that uses the JIK + triangle enumeration scheme. + + This version implements the following: + - It does not store location information in L + - Reverts the order within U's adjancency lists to allow ++ traversal +*/ +/*************************************************************************/ +double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm) +{ + int32_t vi, vj, vjj, vk, vl, nvtxs; + ssize_t ei, eiend, eistart, ej, ejend, ejstart; + int64_t ntriangles; + ssize_t *xadj, *uxadj; + int32_t *adjncy; + int32_t l, hmsize, *hmap; + + gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* determine the starting location of the upper trianglular part */ + uxadj = gk_zmalloc(nvtxs, "uxadj"); + for (vi=0; vi<nvtxs; vi++) { + for (ei=xadj[vi], eiend=xadj[vi+1]; ei<eiend && adjncy[ei]<vi; ei++); + uxadj[vi] = ei; + /* flip the order of Adj(vi)'s upper triangular adjacency list */ + for (ej=xadj[vi+1]-1; ei<ej; ei++, ej--) { + vj = adjncy[ei]; + adjncy[ei] = adjncy[ej]; + adjncy[ej] = vj; + } + } + + /* determine the size of the hash-map and convert it into a format + that is compatible with a bitwise AND operation */ + for (hmsize=0, vi=0; vi<nvtxs; vi++) + hmsize = gk_max(hmsize, (int32_t)(xadj[vi+1]-uxadj[vi])); + for (l=1; hmsize>(1<<l); l++); + hmsize = (1<<(l+4))-1; + hmap = gk_i32smalloc(hmsize+1, 0, "hmap"); + + for (ntriangles=0, vjj=0; vjj<nvtxs; vjj++) { + vj = iperm[vjj]; + + gk_cacheLoad(cache, (size_t)(&xadj[vj])); + gk_cacheLoad(cache, (size_t)(&xadj[vj+1])); + gk_cacheLoad(cache, (size_t)(&uxadj[vj])); + + if (xadj[vj+1]-uxadj[vj] == 0 || uxadj[vj] == xadj[vj]) + continue; + + /* hash Adj(vj) */ + gk_cacheLoad(cache, (size_t)(&uxadj[vj])); + gk_cacheLoad(cache, (size_t)(&xadj[vj+1])); + for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) { + gk_cacheLoad(cache, (size_t)(&adjncy[ej])); + vk = adjncy[ej]; + for (l=(vk&hmsize); + gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0; + l=((l+1)&hmsize)); + hmap[l] = vk; + } + + /* find intersections */ + gk_cacheLoad(cache, (size_t)(&xadj[vj])); + gk_cacheLoad(cache, (size_t)(&uxadj[vj])); + for (ej=xadj[vj], ejend=uxadj[vj]; ej<ejend; ej++) { + gk_cacheLoad(cache, (size_t)(&adjncy[ej])); + gk_cacheLoad(cache, (size_t)(&uxadj[vi])); + vi = adjncy[ej]; + for (ei=uxadj[vi]; gk_cacheLoad(cache, (size_t)(&adjncy[ei])) && adjncy[ei]>vj; ei++) { + vk = adjncy[ei]; + for (l=vk&hmsize; + gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0 && hmap[l]!=vk; + l=((l+1)&hmsize)); + gk_cacheLoad(cache, (size_t)(&hmap[l])); + if (hmap[l] == vk) + ntriangles++; + } + } + + /* reset hash */ + gk_cacheLoad(cache, (size_t)(&uxadj[vj])); + gk_cacheLoad(cache, (size_t)(&xadj[vj+1])); + for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) { + gk_cacheLoad(cache, (size_t)(&adjncy[ej])); + vk = adjncy[ej]; + for (l=(vk&hmsize); + gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=vk; + l=((l+1)&hmsize)); + hmap[l] = 0; + } + } + printf("& compatible hmsize: %"PRId32" #triangles: %"PRIu64"\n", hmsize, ntriangles); + + gk_free((void **)&uxadj, &hmap, LTERM); + + //printf("%zd %zd\n", (ssize_t)cache->nhits, (ssize_t)cache->clock); + + double hitrate = gk_cacheGetHitRate(cache); + gk_cacheDestroy(&cache); + + return hitrate; +} + + +/*************************************************************************/ +/*! This function computes an increasing degree ordering +*/ +/*************************************************************************/ +int32_t *reorder_degrees(params_t *params, gk_graph_t *graph) +{ + int i, v, u, nvtxs, range; + ssize_t j, *xadj; + int32_t *counts, *perm; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + + for (range=0, i=0; i<nvtxs; i++) + range = gk_max(range, xadj[i+1]-xadj[i]); + range++; + + counts = gk_i32smalloc(range+1, 0, "counts"); + for (i=0; i<nvtxs; i++) + counts[xadj[i+1]-xadj[i]]++; + MAKECSR(i, range, counts); + + perm = gk_i32malloc(nvtxs, "perm"); + for (i=0; i<nvtxs; i++) + perm[i] = counts[xadj[i+1]-xadj[i]]++; + + gk_free((void **)&counts, LTERM); + + return perm; +} + + +/*************************************************************************/ +/*! This function re-orders the graph by: + - performing a fixed number of most-popular label propagation iterations + - locally renumbers the vertices with the same label +*/ +/*************************************************************************/ +int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph) +{ + int32_t i, ii, k, nvtxs, maxlbl; + ssize_t j, *xadj; + int32_t *adjncy, *labels, *freq, *perm; + gk_i32kv_t *cand; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels")); + freq = gk_i32smalloc(nvtxs, 0, "freq"); + perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm")); + + for (k=0; k<params->niter; k++) { + gk_i32randArrayPermuteFine(nvtxs, perm, 0); + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + maxlbl = labels[adjncy[xadj[i]]]; + freq[maxlbl] = 1; + for (j=xadj[i]+1; j<xadj[i+1]; j++) { + freq[labels[adjncy[j]]]++; + if (freq[maxlbl] < freq[labels[adjncy[j]]]) + maxlbl = labels[adjncy[j]]; + else if (freq[maxlbl] == freq[labels[adjncy[j]]]) { + if (RandomInRange(2)) + maxlbl = labels[adjncy[j]]; + } + } + for (j=xadj[i]; j<xadj[i+1]; j++) + freq[labels[adjncy[j]]] = 0; + labels[i] = maxlbl; + } + } + + cand = gk_i32kvmalloc(nvtxs, "cand"); + for (i=0; i<nvtxs; i++) { + cand[i].key = labels[i]; + cand[i].val = i; + } + gk_i32kvsorti(nvtxs, cand); + + for (i=0; i<nvtxs; i++) + perm[cand[i].val] = i; + + gk_free((void **)&labels, &freq, &cand, LTERM); + + return perm; +} + + +/*************************************************************************/ +/*! This function re-orders the graph by: + - performing a fixed number of most-popular label propagation iterations + - restricts that propagation to take place within similar degree buckets + of vertices + - locally renumbers the vertices with the same label +*/ +/*************************************************************************/ +int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph) +{ + int32_t i, ii, k, nvtxs, maxlbl; + ssize_t j, *xadj; + int32_t *adjncy, *labels, *freq, *perm, *dbucket; + gk_i32kv_t *cand; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels")); + freq = gk_i32smalloc(nvtxs, 0, "freq"); + perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm")); + dbucket = gk_i32malloc(nvtxs, "dbucket"); + + for (i=0; i<nvtxs; i++) + dbucket[i] = ((xadj[i+1]-xadj[i])>>3); + + for (k=0; k<params->niter; k++) { + gk_i32randArrayPermuteFine(nvtxs, perm, 0); + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + maxlbl = labels[i]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (dbucket[i] != dbucket[adjncy[j]]) + continue; + + freq[labels[adjncy[j]]]++; + if (freq[maxlbl] < freq[labels[adjncy[j]]]) + maxlbl = labels[adjncy[j]]; + else if (freq[maxlbl] == freq[labels[adjncy[j]]]) { + if (RandomInRange(2)) + maxlbl = labels[adjncy[j]]; + } + } + for (j=xadj[i]; j<xadj[i+1]; j++) + freq[labels[adjncy[j]]] = 0; + labels[i] = maxlbl; + } + } + + cand = gk_i32kvmalloc(nvtxs, "cand"); + for (i=0; i<nvtxs; i++) { + cand[i].key = labels[i]; + cand[i].val = i; + } + gk_i32kvsorti(nvtxs, cand); + + for (i=0; i<nvtxs; i++) + perm[cand[i].val] = i; + + gk_free((void **)&labels, &freq, &dbucket, &cand, LTERM); + + return perm; +} + + +/*************************************************************************/ +/*! This function re-orders the graph by: + - performing a fixed number of min-label propagation iterations + - locally renumbers the vertices with the same label +*/ +/*************************************************************************/ +int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph) +{ + int32_t i, ii, k, nvtxs, minlbl; + ssize_t j, *xadj; + int32_t *adjncy, *labels, *perm; + gk_i32kv_t *cand; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels")); + perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm")); + + for (k=0; k<params->niter; k++) { + for (i=0; i<nvtxs; i++) { + minlbl = labels[i]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (minlbl > labels[adjncy[j]]) + minlbl = labels[adjncy[j]]; + } + labels[i] = minlbl; + } + } + + cand = gk_i32kvmalloc(nvtxs, "cand"); + for (i=0; i<nvtxs; i++) { + cand[i].key = labels[i]; + cand[i].val = i; + } + gk_i32kvsorti(nvtxs, cand); + + for (i=0; i<nvtxs; i++) { + perm[cand[i].val] = i; + //if (i>0 && cand[i].key != cand[i-1].key) + // printf("%10d %10d\n", i-1, cand[i-1].key); + } + //printf("%10d %10d\n", i-1, cand[i-1].key); + + gk_free((void **)&labels, &cand, LTERM); + + return perm; +} + + +/*************************************************************************/ +/*! This function re-orders the graph by: + - performing a fixed number of min-label propagation iterations + - restricts that propagation to take place within similar degree buckets + of vertices + - locally renumbers the vertices with the same label +*/ +/*************************************************************************/ +int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph) +{ + int32_t i, ii, k, nvtxs, minlbl; + ssize_t j, *xadj; + int32_t *adjncy, *labels, *perm, *dbucket; + gk_i32kv_t *cand; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels")); + perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm")); + dbucket = gk_i32malloc(nvtxs, "dbucket"); + + for (i=0; i<nvtxs; i++) + dbucket[i] = ((xadj[i+1]-xadj[i])>>3); + + for (k=0; k<params->niter; k++) { + for (i=0; i<nvtxs; i++) { + minlbl = labels[i]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (dbucket[i] != dbucket[adjncy[j]]) + continue; + + if (minlbl > labels[adjncy[j]]) + minlbl = labels[adjncy[j]]; + } + labels[i] = minlbl; + } + } + + cand = gk_i32kvmalloc(nvtxs, "cand"); + for (i=0; i<nvtxs; i++) { + cand[i].key = labels[i]; + cand[i].val = i; + } + gk_i32kvsorti(nvtxs, cand); + + for (i=0; i<nvtxs; i++) { + perm[cand[i].val] = i; + //if (i>0 && cand[i].key != cand[i-1].key) + // printf("%10d %10d\n", i-1, cand[i-1].key); + } + //printf("%10d %10d\n", i-1, cand[i-1].key); + + gk_free((void **)&labels, &dbucket, &cand, LTERM); + + return perm; +} + + +/*************************************************************************/ +/*! This function prints run parameters */ +/*************************************************************************/ +void print_init_info(params_t *params, gk_graph_t *graph) +{ + printf("*******************************************************************************\n"); + printf(" gkgraph\n\n"); + printf("Graph Information ----------------------------------------------------------\n"); + printf(" input file=%s, [%d, %zd]\n", + params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]); + + printf("\n"); + printf("Options --------------------------------------------------------------------\n"); + printf(" lnbits=%d, cnbits=%d, type=%d, niter=%d, lamda=%f, eps=%e\n", + params->lnbits, params->cnbits, params->type, params->niter, + params->lamda, params->eps); + + printf("\n"); + printf("Working... -----------------------------------------------------------------\n"); +} + + +/*************************************************************************/ +/*! This function prints final statistics */ +/*************************************************************************/ +void print_final_info(params_t *params) +{ + printf("\n"); + printf("Memory Usage Information -----------------------------------------------------\n"); + printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed()); + printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed()); + printf("********************************************************************************\n"); +} + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->lnbits = 6; + params->cnbits = 13; + params->type = 1; + params->niter = 1; + params->eps = 1e-10; + params->lamda = 0.20; + params->nosort = 0; + params->write = 0; + params->infile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_LNBITS: + if (gk_optarg) params->lnbits = atoi(gk_optarg); + break; + case CMD_CNBITS: + if (gk_optarg) params->cnbits = atoi(gk_optarg); + break; + case CMD_TYPE: + if (gk_optarg) params->type = atoi(gk_optarg); + break; + case CMD_NITER: + if (gk_optarg) params->niter = atoi(gk_optarg); + break; + case CMD_EPS: + if (gk_optarg) params->eps = atof(gk_optarg); + break; + case CMD_LAMDA: + if (gk_optarg) params->lamda = atof(gk_optarg); + break; + case CMD_NOSORT: + params->nosort = 1; + break; + case CMD_WRITE: + params->write = 1; + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(0); + break; + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(0); + } + } + + if (argc-gk_optind != 1) { + printf("Unrecognized parameters."); + for (i=0; strlen(shorthelpstr[i]) > 0; i++) + printf("%s\n", shorthelpstr[i]); + exit(0); + } + + params->infile = gk_strdup(argv[gk_optind++]); + + if (argc-gk_optind > 0) + params->outfile = gk_strdup(argv[gk_optind++]); + else + params->outfile = gk_strdup("gkgraph.out"); + + if (!gk_fexists(params->infile)) + errexit("input file %s does not exist.\n", params->infile); + + return params; +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/gksort.c b/3rdParty/metis/metis-5.1.1/GKlib/test/gksort.c new file mode 100644 index 000000000..65438368f --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/gksort.c @@ -0,0 +1,346 @@ +/*! +\file gksort.c +\brief Testing module for the various sorting routines in GKlib + +\date Started 4/4/2007 +\author George +\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +#define N 10000 + +/*************************************************************************/ +/*! Testing module for gk_?isort() routine */ +/*************************************************************************/ +void test_isort() +{ + gk_idx_t i; + int array[N]; + + /* test the increasing sort */ + printf("Testing iisort...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432); + + gk_isorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] > array[i+1]) + printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]); + } + + + /* test the decreasing sort */ + printf("Testing disort...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432); + + gk_isortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] < array[i+1]) + printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]); + } + +} + + +/*************************************************************************/ +/*! Testing module for gk_?fsort() routine */ +/*************************************************************************/ +void test_fsort() +{ + gk_idx_t i; + float array[N]; + + /* test the increasing sort */ + printf("Testing ifsort...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323)); + + gk_fsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] > array[i+1]) + printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]); + } + + + /* test the decreasing sort */ + printf("Testing dfsort...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323)); + + gk_fsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] < array[i+1]) + printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]); + } + +} + + +/*************************************************************************/ +/*! Testing module for gk_?idxsort() routine */ +/*************************************************************************/ +void test_idxsort() +{ + gk_idx_t i; + gk_idx_t array[N]; + + /* test the increasing sort */ + printf("Testing idxsorti...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432); + + gk_idxsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] > array[i+1]) + printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]); + } + + + /* test the decreasing sort */ + printf("Testing idxsortd...\n"); + for (i=0; i<N; i++) + array[i] = RandomInRange(123432); + + gk_idxsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i] < array[i+1]) + printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]); + } + +} + + + +/*************************************************************************/ +/*! Testing module for gk_?ikvsort() routine */ +/*************************************************************************/ +void test_ikvsort() +{ + gk_idx_t i; + gk_ikv_t array[N]; + + /* test the increasing sort */ + printf("Testing ikvsorti...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432); + array[i].val = i; + } + + gk_ikvsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key > array[i+1].key) + printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + + + /* test the decreasing sort */ + printf("Testing ikvsortd...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432); + array[i].val = i; + } + + gk_ikvsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key < array[i+1].key) + printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + +} + + + +/*************************************************************************/ +/*! Testing module for gk_?fkvsort() routine */ +/*************************************************************************/ +void test_fkvsort() +{ + gk_idx_t i; + gk_fkv_t array[N]; + + /* test the increasing sort */ + printf("Testing fkvsorti...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323)); + array[i].val = i; + } + + gk_fkvsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key > array[i+1].key) + printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + + + /* test the decreasing sort */ + printf("Testing fkvsortd...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323)); + array[i].val = i; + } + + gk_fkvsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key < array[i+1].key) + printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + +} + + +/*************************************************************************/ +/*! Testing module for gk_?dkvsort() routine */ +/*************************************************************************/ +void test_dkvsort() +{ + gk_idx_t i; + gk_dkv_t array[N]; + + /* test the increasing sort */ + printf("Testing dkvsorti...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323)); + array[i].val = i; + } + + gk_dkvsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key > array[i+1].key) + printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + + + /* test the decreasing sort */ + printf("Testing dkvsortd...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323)); + array[i].val = i; + } + + gk_dkvsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key < array[i+1].key) + printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + +} + + +/*************************************************************************/ +/*! Testing module for gk_?skvsort() routine */ +/*************************************************************************/ +void test_skvsort() +{ + gk_idx_t i; + gk_skv_t array[N]; + char line[256]; + + /* test the increasing sort */ + printf("Testing skvsorti...\n"); + for (i=0; i<N; i++) { + sprintf(line, "%d", RandomInRange(123432)); + array[i].key = gk_strdup(line); + array[i].val = i; + } + + gk_skvsorti(N, array); + + for (i=0; i<N-1; i++) { + if (strcmp(array[i].key, array[i+1].key) > 0) + printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + + + /* test the decreasing sort */ + printf("Testing skvsortd...\n"); + for (i=0; i<N; i++) { + sprintf(line, "%d", RandomInRange(123432)); + array[i].key = gk_strdup(line); + array[i].val = i; + } + + gk_skvsortd(N, array); + + for (i=0; i<N-1; i++) { + /*printf("%s\n", array[i].key);*/ + if (strcmp(array[i].key, array[i+1].key) < 0) + printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val); + } + +} + + +/*************************************************************************/ +/*! Testing module for gk_?idxkvsort() routine */ +/*************************************************************************/ +void test_idxkvsort() +{ + gk_idx_t i; + gk_idxkv_t array[N]; + + /* test the increasing sort */ + printf("Testing idxkvsorti...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432); + array[i].val = i; + } + + gk_idxkvsorti(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key > array[i+1].key) + printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n", + (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, + (ssize_t)array[i].val, (ssize_t)array[i+1].val); + } + + + /* test the decreasing sort */ + printf("Testing idxkvsortd...\n"); + for (i=0; i<N; i++) { + array[i].key = RandomInRange(123432); + array[i].val = i; + } + + gk_idxkvsortd(N, array); + + for (i=0; i<N-1; i++) { + if (array[i].key < array[i+1].key) + printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n", + (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, + (ssize_t)array[i].val, (ssize_t)array[i+1].val); + } + +} + + + + +int main() +{ + test_isort(); + test_fsort(); + test_idxsort(); + + test_ikvsort(); + test_fkvsort(); + test_dkvsort(); + test_skvsort(); + test_idxkvsort(); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/grKx.c b/3rdParty/metis/metis-5.1.1/GKlib/test/grKx.c new file mode 100644 index 000000000..a72b58092 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/grKx.c @@ -0,0 +1,256 @@ +/*! +\file +\brief A simple program to create multiple copies of an input matrix. + +\date 5/30/2013 +\author George +\version \verbatim $Id: grKx.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int inf, outf; + int numbering; /* input numbering (output when applicable) */ + int readvals; /* input values (output when applicable) */ + int writevals; /* output values */ + int rshuf, cshuf; /* random shuffle of rows/columns */ + int symmetric; /* a symmetric shuffle */ + int ncopies; /* the copies of the graph to create */ + char *infile; /* input file */ + char *outfile; /* output file */ +} params_t; + + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +#define CMD_NUMONE 1 +#define CMD_NOREADVALS 2 +#define CMD_NOWRITEVALS 3 +#define CMD_RSHUF 4 +#define CMD_CSHUF 5 +#define CMD_SYMMETRIC 6 +#define CMD_HELP 100 + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"numone", 0, 0, CMD_NUMONE}, + {"noreadvals", 0, 0, CMD_NOREADVALS}, + {"nowritevals", 0, 0, CMD_NOWRITEVALS}, + {"rshuf", 0, 0, CMD_RSHUF}, + {"cshuf", 0, 0, CMD_CSHUF}, + {"symmetric", 0, 0, CMD_SYMMETRIC}, + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + + +/*-------------------------------------------------------------------*/ +/* Mini help */ +/*-------------------------------------------------------------------*/ +static char helpstr[][100] = { +" ", +"Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>", +" ", +" Required parameters", +" infile, outfile", +" The name of the input/output CSR file.", +" ", +" inf/outf", +" The format of the input/output file.", +" Supported values are:", +" 1 GK_CSR_FMT_CLUTO", +" 2 GK_CSR_FMT_CSR", +" 3 GK_CSR_FMT_METIS", +" 4 GK_CSR_FMT_BINROW", +" 6 GK_CSR_FMT_IJV", +" 7 GK_CSR_FMT_BIJV", +" ", +" Optional parameters", +" -numone", +" Specifies that the numbering of the input file starts from 1. ", +" It only applies to CSR/IJV formats.", +" ", +" -nowritevals", +" Specifies that no values will be output.", +" ", +" -noreadvals", +" Specifies that the values will not be read when applicable.", +" ", +" -rshuf", +" Specifies that the rows will be randmly shuffled prior to output.", +" ", +" -cshuf", +" Specifies that the columns will be randmly shuffled prior to output.", +" ", +" -symmetric", +" Specifies that the row+column shuffling will be symmetric.", +" ", +" -help", +" Prints this message.", +"" +}; + +static char shorthelpstr[][100] = { +" ", +" Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>", +" use 'csrconv -help' for a summary of the options.", +"" +}; + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->numbering = 0; + params->readvals = 1; + params->writevals = 1; + params->rshuf = 0; + params->cshuf = 0; + params->symmetric = 0; + + params->inf = -1; + params->outf = -1; + params->infile = NULL; + params->outfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_NUMONE: + params->numbering = 1; + break; + case CMD_NOREADVALS: + params->readvals = 0; + break; + case CMD_NOWRITEVALS: + params->writevals = 0; + break; + case CMD_RSHUF: + params->rshuf = 1; + break; + case CMD_CSHUF: + params->cshuf = 1; + break; + case CMD_SYMMETRIC: + params->symmetric = 1; + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(0); + break; + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(0); + } + } + + if (argc-gk_optind != 5) { + printf("Unrecognized parameters."); + for (i=0; strlen(shorthelpstr[i]) > 0; i++) + printf("%s\n", shorthelpstr[i]); + exit(0); + } + + params->infile = gk_strdup(argv[gk_optind++]); + params->inf = atoi(argv[gk_optind++]); + params->outfile = gk_strdup(argv[gk_optind++]); + params->outf = atoi(argv[gk_optind++]); + params->ncopies = atoi(argv[gk_optind++]); + + if (!gk_fexists(params->infile)) + errexit("input file %s does not exist.\n", params->infile); + + return params; +} + + +/*************************************************************************/ +/*! the entry point */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + ssize_t i, j, k, knnz, nrows, ncols, ncopies; + int what; + params_t *params; + gk_csr_t *mat, *kmat, *smat; + + /* get command-line options */ + params = parse_cmdline(argc, argv); + + /* read the data */ + mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering); + + /* create the copies */ + ncopies = params->ncopies; + + nrows = mat->nrows; + ncols = mat->ncols; + knnz = mat->rowptr[nrows]*ncopies; + + kmat = gk_csr_Create(); + kmat->nrows = nrows*ncopies; + kmat->ncols = ncols*ncopies; + kmat->rowptr = gk_zmalloc(kmat->nrows+1, "rowptr"); + kmat->rowind = gk_imalloc(knnz, "rowind"); + if (mat->rowval) + kmat->rowval = gk_fmalloc(knnz, "rowval"); + + kmat->rowptr[0] = knnz = 0; + for (k=0; k<ncopies; k++) { + for (i=0; i<nrows; i++) { + for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++, knnz++) { + kmat->rowind[knnz] = mat->rowind[j] + k*ncols; + if (mat->rowval) + kmat->rowval[knnz] = mat->rowval[j]; + } + kmat->rowptr[k*nrows+i+1] = knnz; + } + } + + gk_csr_Free(&mat); + mat = kmat; + + + if (params->rshuf || params->cshuf) { + if (params->rshuf && params->cshuf) + what = GK_CSR_ROWCOL; + else if (params->rshuf) + what = GK_CSR_ROW; + else + what = GK_CSR_COL; + + smat = gk_csr_Shuffle(mat, what, params->symmetric); + gk_csr_Free(&mat); + mat = smat; + } + + if (params->writevals && mat->rowval == NULL) + mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval"); + + gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0); + + gk_csr_Free(&mat); + +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/m2mnbrs.c b/3rdParty/metis/metis-5.1.1/GKlib/test/m2mnbrs.c new file mode 100644 index 000000000..53f35caea --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/m2mnbrs.c @@ -0,0 +1,304 @@ +/*! +\file +\brief It takes as input two CSR matrices and finds for each row of the + first matrix the most similar rows in the second matrix. + +\date 9/27/2014 +\author George +\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int simtype; /*!< The similarity type to use */ + int nnbrs; /*!< The maximum number of nearest neighbots to output */ + float minsim; /*!< The minimum similarity to use for keeping neighbors */ + + int verbosity; /*!< The reporting verbosity level */ + + char *qfile; /*!< The file storing the query documents */ + char *cfile; /*!< The file storing the collection documents */ + char *outfile; /*!< The file where the output will be stored */ + + /* timers */ + double timer_global; + double timer_1; + double timer_2; + double timer_3; + double timer_4; +} params_t; + + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +/* Versions */ +#define VER_MAJOR 0 +#define VER_MINOR 1 +#define VER_SUBMINOR 0 + +/* Command-line option codes */ +#define CMD_SIMTYPE 10 +#define CMD_NNBRS 20 +#define CMD_MINSIM 22 +#define CMD_VERBOSITY 70 +#define CMD_HELP 100 + +/* The text labels for the different simtypes */ +static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""}; + + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"simtype", 1, 0, CMD_SIMTYPE}, + {"nnbrs", 1, 0, CMD_NNBRS}, + {"minsim", 1, 0, CMD_MINSIM}, + {"verbosity", 1, 0, CMD_VERBOSITY}, + + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + +static gk_StringMap_t simtype_options[] = { + {"cos", GK_CSR_COS}, + {"jac", GK_CSR_JAC}, + {NULL, 0} +}; + + +/*------------------------------------------------------------------- + * Mini help + *-------------------------------------------------------------------*/ +static char helpstr[][100] = +{ +" ", +"Usage: m2mnbrs [options] qfile cfile [outfile]", +" ", +" Options", +" -simtype=string", +" Specifies the type of similarity to use. Possible values are:", +" cos - Cosine similarity", +" jac - Jacquard similarity [default]", +" ", +" -nnbrs=int", +" Specifies the maximum number of nearest neighbors.", +" A value of -1 indicates that all neighbors will be considered.", +" Default value is 100.", +" ", +" -minsim=float", +" The minimum allowed similarity between neighbors. ", +" Default value is .25.", +" ", +" -verbosity=int", +" Specifies the level of debugging information to be displayed.", +" Default value is 0.", +" ", +" -help", +" Prints this message.", +"" +}; + + + +/*************************************************************************/ +/*! Function prototypes */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]); +void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat); + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->simtype = GK_CSR_JAC; + params->nnbrs = 100; + params->minsim = .25; + params->verbosity = -1; + params->qfile = NULL; + params->cfile = NULL; + params->outfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_SIMTYPE: + if (gk_optarg) { + if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1) + errexit("Invalid simtype of %s.\n", gk_optarg); + } + break; + + case CMD_NNBRS: + if (gk_optarg) params->nnbrs = atoi(gk_optarg); + break; + + case CMD_MINSIM: + if (gk_optarg) params->minsim = atof(gk_optarg); + break; + + case CMD_VERBOSITY: + if (gk_optarg) params->verbosity = atoi(gk_optarg); + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(EXIT_SUCCESS); + break; + + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + /* Get the input/output file info */ + if (argc-gk_optind < 1) { + printf("Missing input/output file info.\n Use %s -help for a summary of the options.\n", argv[0]); + exit(EXIT_FAILURE); + } + + params->qfile = gk_strdup(argv[gk_optind++]); + params->cfile = gk_strdup(argv[gk_optind++]); + params->outfile = (gk_optind < argc ? gk_strdup(argv[gk_optind++]) : NULL); + + if (!gk_fexists(params->qfile)) + errexit("input file %s does not exist.\n", params->qfile); + if (!gk_fexists(params->cfile)) + errexit("input file %s does not exist.\n", params->cfile); + + return params; +} + + +/*************************************************************************/ +/*! This is the entry point of the program */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + params_t *params; + gk_csr_t *qmat, *cmat; + int rc = EXIT_SUCCESS; + + params = parse_cmdline(argc, argv); + + qmat = gk_csr_Read(params->qfile, GK_CSR_FMT_CSR, 1, 0); + cmat = gk_csr_Read(params->cfile, GK_CSR_FMT_CSR, 1, 0); + + + printf("********************************************************************************\n"); + printf("sd (%d.%d.%d) Copyright 2014, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR); + printf(" simtype=%s, nnbrs=%d, minsim=%.2f\n", + simtypenames[params->simtype], params->nnbrs, params->minsim); + printf(" qfile=%s, nrows=%d, ncols=%d, nnz=%zd\n", + params->qfile, qmat->nrows, qmat->ncols, qmat->rowptr[qmat->nrows]); + printf(" cfile=%s, nrows=%d, ncols=%d, nnz=%zd\n", + params->cfile, cmat->nrows, cmat->ncols, cmat->rowptr[cmat->nrows]); + + gk_clearwctimer(params->timer_global); + gk_clearwctimer(params->timer_1); + gk_clearwctimer(params->timer_2); + gk_clearwctimer(params->timer_3); + gk_clearwctimer(params->timer_4); + + gk_startwctimer(params->timer_global); + + FindNeighbors(params, qmat, cmat); + + gk_stopwctimer(params->timer_global); + + printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global)); + printf(" timer1: %.2lfs\n", gk_getwctimer(params->timer_1)); + printf(" timer2: %.2lfs\n", gk_getwctimer(params->timer_2)); + printf(" timer3: %.2lfs\n", gk_getwctimer(params->timer_3)); + printf(" timer4: %.2lfs\n", gk_getwctimer(params->timer_4)); + printf("********************************************************************************\n"); + + gk_csr_Free(&qmat); + gk_csr_Free(&cmat); + + exit(rc); +} + + +/*************************************************************************/ +/*! Reads and computes the neighbors of each query document against the + collection of documents */ +/**************************************************************************/ +void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat) +{ + int iQ, iH, nhits; + int32_t *marker; + gk_fkv_t *hits, *cand; + FILE *fpout; + + GKASSERT(qmat->ncols <= cmat->ncols); + + /* if cosine, make rows unit length */ + if (params->simtype == GK_CSR_COS) { + gk_csr_Normalize(qmat, GK_CSR_ROW, 2); + gk_csr_Normalize(cmat, GK_CSR_ROW, 2); + } + + /* create the inverted index */ + gk_csr_CreateIndex(cmat, GK_CSR_COL); + + /* compute the row norms */ + gk_csr_ComputeSquaredNorms(cmat, GK_CSR_ROW); + + /* create the output file */ + fpout = (params->outfile ? gk_fopen(params->outfile, "w", "FindNeighbors: fpout") : NULL); + + /* allocate memory for the necessary working arrays */ + hits = gk_fkvmalloc(cmat->nrows, "FindNeighbors: hits"); + marker = gk_i32smalloc(cmat->nrows, -1, "FindNeighbors: marker"); + cand = gk_fkvmalloc(cmat->nrows, "FindNeighbors: cand"); + + + /* find the best neighbors for each query document */ + gk_startwctimer(params->timer_1); + for (iQ=0; iQ<qmat->nrows; iQ++) { + if (params->verbosity > 0) + printf("Working on query %7d\n", iQ); + + /* find the neighbors of the ith document */ + nhits = gk_csr_GetSimilarRows(cmat, + qmat->rowptr[iQ+1]-qmat->rowptr[iQ], + qmat->rowind+qmat->rowptr[iQ], + qmat->rowval+qmat->rowptr[iQ], + params->simtype, params->nnbrs, params->minsim, + hits, marker, cand); + + /* write the results in the file */ + if (fpout) { + for (iH=0; iH<nhits; iH++) + fprintf(fpout, "%8d %8zd %.3f\n", iQ, hits[iH].val, hits[iH].key); + } + } + gk_stopwctimer(params->timer_1); + + + /* cleanup and exit */ + if (fpout) gk_fclose(fpout); + + gk_free((void **)&hits, &marker, &cand, LTERM); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/rw.c b/3rdParty/metis/metis-5.1.1/GKlib/test/rw.c new file mode 100644 index 000000000..1a3295ee7 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/rw.c @@ -0,0 +1,306 @@ +/*! +\file +\brief A simple (personalized) random walk program to test GKlib's routines + +\date 6/12/2008 +\author George +\version \verbatim $Id$ \endverbatim +*/ + +#include <GKlib.h> + +/*************************************************************************/ +/*! Data structures for the code */ +/*************************************************************************/ +typedef struct { + int niter; + int ntvs; + int ppr; + float eps; + float lamda; + char *infile; + char *outfile; +} params_t; + +/*************************************************************************/ +/*! Constants */ +/*************************************************************************/ +#define CMD_NITER 1 +#define CMD_EPS 2 +#define CMD_LAMDA 3 +#define CMD_PPR 4 +#define CMD_NTVS 5 +#define CMD_HELP 10 + + +/*************************************************************************/ +/*! Local variables */ +/*************************************************************************/ +static struct gk_option long_options[] = { + {"niter", 1, 0, CMD_NITER}, + {"lamda", 1, 0, CMD_LAMDA}, + {"eps", 1, 0, CMD_EPS}, + {"ppr", 1, 0, CMD_PPR}, + {"ntvs", 1, 0, CMD_NTVS}, + {"help", 0, 0, CMD_HELP}, + {0, 0, 0, 0} +}; + + +/*-------------------------------------------------------------------*/ +/* Mini help */ +/*-------------------------------------------------------------------*/ +static char helpstr[][100] = { +" ", +"Usage: rw [options] <graph-file> <out-file>", +" ", +" Required parameters", +" graph-file", +" The name of the file storing the transactions. The file is in ", +" Metis' graph format.", +" ", +" Optional parameters", +" -niter=int", +" Specifies the maximum number of iterations. [default: 100]", +" ", +" -lamda=float", +" Specifies the follow-the-adjacent-links probability. [default: 0.80]", +" ", +" -eps=float", +" Specifies the error tollerance. [default: 1e-10]", +" ", +" -ppr=int", +" Specifies the source of the personalized PR. [default: -1]", +" ", +" -ntvs=int", +" Specifies the number of test-vectors to compute. [default: -1]", +" ", +" -help", +" Prints this message.", +"" +}; + +static char shorthelpstr[][100] = { +" ", +" Usage: rw [options] <graph-file> <out-file>", +" use 'rw -help' for a summary of the options.", +"" +}; + + + +/*************************************************************************/ +/*! Function prototypes */ +/*************************************************************************/ +void print_init_info(params_t *params, gk_csr_t *mat); +void print_final_info(params_t *params); +params_t *parse_cmdline(int argc, char *argv[]); + + +/*************************************************************************/ +/*! the entry point */ +/**************************************************************************/ +int main(int argc, char *argv[]) +{ + ssize_t i, j, niter; + params_t *params; + gk_csr_t *mat; + FILE *fpout; + + /* get command-line options */ + params = parse_cmdline(argc, argv); + + /* read the data */ + mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1); + + /* display some basic stats */ + print_init_info(params, mat); + + + if (params->ntvs != -1) { + /* compute the pr for different randomly generated restart-distribution vectors */ + float **prs; + + prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs"); + + /* generate the random restart vectors */ + for (j=0; j<params->ntvs; j++) { + for (i=0; i<mat->nrows; i++) + prs[j][i] = RandomInRange(931); + gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1); + + niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]); + printf("tvs#: %zd; niters: %zd\n", j, niter); + } + + /* output the computed pr scores */ + fpout = gk_fopen(params->outfile, "w", "main: outfile"); + for (i=0; i<mat->nrows; i++) { + for (j=0; j<params->ntvs; j++) + fprintf(fpout, "%.4e ", prs[j][i]); + fprintf(fpout, "\n"); + } + gk_fclose(fpout); + + gk_fFreeMatrix(&prs, params->ntvs, mat->nrows); + } + else if (params->ppr != -1) { + /* compute the personalized pr from the specified vertex */ + float *pr; + + pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr"); + + pr[params->ppr-1] = 1.0; + + niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); + printf("ppr: %d; niters: %zd\n", params->ppr, niter); + + /* output the computed pr scores */ + fpout = gk_fopen(params->outfile, "w", "main: outfile"); + for (i=0; i<mat->nrows; i++) + fprintf(fpout, "%.4e\n", pr[i]); + gk_fclose(fpout); + + gk_free((void **)&pr, LTERM); + } + else { + /* compute the standard pr */ + int jmax; + float diff, maxdiff; + float *pr; + + pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr"); + + niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); + printf("pr; niters: %zd\n", niter); + + /* output the computed pr scores */ + fpout = gk_fopen(params->outfile, "w", "main: outfile"); + for (i=0; i<mat->nrows; i++) { + for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { + if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) { + maxdiff = diff; + jmax = mat->rowind[j]; + } + } + fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], + mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1); + } + gk_fclose(fpout); + + gk_free((void **)&pr, LTERM); + } + + gk_csr_Free(&mat); + + /* display some final stats */ + print_final_info(params); +} + + + +/*************************************************************************/ +/*! This function prints run parameters */ +/*************************************************************************/ +void print_init_info(params_t *params, gk_csr_t *mat) +{ + printf("*******************************************************************************\n"); + printf(" fis\n\n"); + printf("Matrix Information ---------------------------------------------------------\n"); + printf(" input file=%s, [%d, %d, %zd]\n", + params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]); + + printf("\n"); + printf("Options --------------------------------------------------------------------\n"); + printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n", + params->niter, params->ntvs, params->ppr, params->lamda, params->eps); + + printf("\n"); + printf("Performing random walks... ----------------------------------------------\n"); +} + + +/*************************************************************************/ +/*! This function prints final statistics */ +/*************************************************************************/ +void print_final_info(params_t *params) +{ + printf("\n"); + printf("Memory Usage Information -----------------------------------------------------\n"); + printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed()); + printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed()); + printf("********************************************************************************\n"); +} + + +/*************************************************************************/ +/*! This is the entry point of the command-line argument parser */ +/*************************************************************************/ +params_t *parse_cmdline(int argc, char *argv[]) +{ + int i; + int c, option_index; + params_t *params; + + params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params"); + + /* initialize the params data structure */ + params->niter = 100; + params->ppr = -1; + params->ntvs = -1; + params->eps = 1e-10; + params->lamda = 0.80; + params->infile = NULL; + params->outfile = NULL; + + + /* Parse the command line arguments */ + while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) { + switch (c) { + case CMD_NITER: + if (gk_optarg) params->niter = atoi(gk_optarg); + break; + case CMD_NTVS: + if (gk_optarg) params->ntvs = atoi(gk_optarg); + break; + case CMD_PPR: + if (gk_optarg) params->ppr = atoi(gk_optarg); + break; + case CMD_EPS: + if (gk_optarg) params->eps = atof(gk_optarg); + break; + case CMD_LAMDA: + if (gk_optarg) params->lamda = atof(gk_optarg); + break; + + case CMD_HELP: + for (i=0; strlen(helpstr[i]) > 0; i++) + printf("%s\n", helpstr[i]); + exit(0); + break; + case '?': + default: + printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]); + exit(0); + } + } + + if (argc-gk_optind != 2) { + printf("Unrecognized parameters."); + for (i=0; strlen(shorthelpstr[i]) > 0; i++) + printf("%s\n", shorthelpstr[i]); + exit(0); + } + + params->infile = gk_strdup(argv[gk_optind++]); + params->outfile = gk_strdup(argv[gk_optind++]); + + if (!gk_fexists(params->infile)) + errexit("input file %s does not exist.\n", params->infile); + + if (params->ppr != -1 && params->ntvs != -1) + errexit("Only one of the -ppr and -ntvs options can be specified.\n"); + + return params; +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/splatt2svd.c b/3rdParty/metis/metis-5.1.1/GKlib/test/splatt2svd.c new file mode 100644 index 000000000..111d31c94 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/splatt2svd.c @@ -0,0 +1,98 @@ +/*! +\file +\brief A simple program to convert a tensor in coordinate format into an unfolded + matrix + +\author George +*/ + +#include <GKlib.h> + + +int main(int argc, char *argv[]) +{ + size_t nnz, i, j, k, nI, nJ, nK, nrows, ncols; + int32_t *I, *J, *K, *rowind, *colind; + ssize_t *rowptr, *colptr; + float *V, *rowval, *colval; + + if (argc != 2) + errexit("Usage %s <infile> [%d]\n", argv[0], argc); + + if (!gk_fexists(argv[1])) + errexit("File %s does not exist.\n", argv[1]); + + gk_getfilestats(argv[1], &nnz, NULL, NULL, NULL); + I = gk_i32malloc(nnz, "I"); + J = gk_i32malloc(nnz, "J"); + K = gk_i32malloc(nnz, "K"); + V = gk_fmalloc(nnz, "V"); + + fprintf(stderr, "Input nnz: %zd\n", nnz); + + FILE *fpin = gk_fopen(argv[1], "r", "infile"); + for (i=0; i<nnz; i++) { + if (4 != fscanf(fpin, "%d %d %d %f", K+i, I+i, J+i, V+i)) + errexit("Failed to read 4 values in line %zd\n", i); + K[i]--; I[i]--; J[i]--; + } + gk_fclose(fpin); + + nI = gk_i32max(nnz, I, 1)+1; + nJ = gk_i32max(nnz, J, 1)+1; + nK = gk_i32max(nnz, K, 1)+1; + + fprintf(stderr, "nI: %zd, nJ: %zd, nK: %zd\n", nI, nJ, nK); + + nrows = nK*nI; + ncols = nJ; + rowptr = gk_zsmalloc(nrows+1, 0, "rowptr"); + for (i=0; i<nnz; i++) + rowptr[K[i]*nI+I[i]]++; + MAKECSR(i, nrows, rowptr); + + rowind = gk_i32malloc(nnz, "rowind"); + rowval = gk_fmalloc(nnz, "rowval"); + for (i=0; i<nnz; i++) { + rowind[rowptr[K[i]*nI+I[i]]] = J[i]; + rowval[rowptr[K[i]*nI+I[i]]] = V[i]; + rowptr[K[i]*nI+I[i]]++; + } + SHIFTCSR(i, nrows, rowptr); + + gk_free((void **)&I, &J, &K, &V, LTERM); + + colptr = gk_zsmalloc(ncols+1, 0, "colptr"); + colind = gk_i32malloc(nnz, "colind"); + colval = gk_fmalloc(nnz, "colval"); + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) + colptr[rowind[j]]++; + } + MAKECSR(i, ncols, colptr); + for (i=0; i<nrows; i++) { + for (j=rowptr[i]; j<rowptr[i+1]; j++) { + colind[colptr[rowind[j]]] = i; + colval[colptr[rowind[j]]] = rowval[j]; + colptr[rowind[j]]++; + } + } + SHIFTCSR(i, ncols, colptr); + + /* sanity check */ + for (i=0; i<ncols; i++) { + for (j=colptr[i]+1; j<colptr[i+1]; j++) { + if (colind[j-1] == colind[j]) + fprintf(stderr, "Duplicate row indices: %d %d %d\n", (int)i, colind[j], colind[j-1]); + } + } + + printf("%zd %zd %zd\n", nrows, ncols, nnz); + for (i=0; i<ncols; i++) { + printf("%zd\n", colptr[i+1]-colptr[i]); + for (j=colptr[i]; j<colptr[i+1]; j++) + printf("%d %.3f\n", colind[j], colval[j]); + } + +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/test/strings.c b/3rdParty/metis/metis-5.1.1/GKlib/test/strings.c new file mode 100644 index 000000000..b241d3ff0 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/test/strings.c @@ -0,0 +1,82 @@ +/*! +\file strings.c +\brief Testing module for the string functions in GKlib + +\date Started 3/5/2007 +\author George +\version\verbatim $Id: strings.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + +#include <GKlib.h> + + +/*************************************************************************/ +/*! Testing module for gk_strstr_replace() */ +/*************************************************************************/ +void test_strstr_replace() +{ + char *new_str; + int rc; + + rc = gk_strstr_replace("This is a simple string", "s", "S", "", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + + rc = gk_strstr_replace("This is a simple string", "s", "S", "g", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + + rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "g", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + + rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "ig", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w(\\w+)\\w\\b", "$1", "ig", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w+\\b", "word", "ig", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4", + "(http://www\\.cs\\.umn\\.edu/)(.*)-T(\\d+)", "$1$2-P$3", "g", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4", + "(\\d+)", "number:$1", "ig", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + + rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4", + "(http://www\\.cs\\.umn\\.edu/)", "[$1]", "g", &new_str); + printf("%d, %s.\n", rc, new_str); + gk_free((void **)&new_str, LTERM); + + + +} + + + +int main() +{ + test_strstr_replace(); + +/* + { + int i; + for (i=0; i<1000; i++) + printf("%d\n", RandomInRange(3)); + } +*/ +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/timers.c b/3rdParty/metis/metis-5.1.1/GKlib/timers.c new file mode 100644 index 000000000..bb8f29620 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/timers.c @@ -0,0 +1,52 @@ +/*! +\file timers.c +\brief Various timing functions + +\date Started 4/12/2007 +\author George +\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#include <GKlib.h> + + + + +/************************************************************************* +* This function returns the CPU seconds +**************************************************************************/ +double gk_WClockSeconds(void) +{ +#ifdef __GNUC__ + struct timeval ctime; + + gettimeofday(&ctime, NULL); + + return (double)ctime.tv_sec + (double).000001*ctime.tv_usec; +#else + return (double)time(NULL); +#endif +} + + +/************************************************************************* +* This function returns the CPU seconds +**************************************************************************/ +double gk_CPUSeconds(void) +{ +//#ifdef __OPENMP__ +#ifdef __OPENMPXXXX__ + return omp_get_wtime(); +#else + #if defined(WIN32) || defined(__MINGW32__) + return((double) clock()/CLOCKS_PER_SEC); + #else + struct rusage r; + + getrusage(RUSAGE_SELF, &r); + return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec)); + #endif +#endif +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/tokenizer.c b/3rdParty/metis/metis-5.1.1/GKlib/tokenizer.c new file mode 100644 index 000000000..5efd262db --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/tokenizer.c @@ -0,0 +1,77 @@ +/*! +\file tokenizer.c +\brief String tokenization routines + +This file contains various routines for splitting an input string into +tokens and returning them in form of a list. The goal is to mimic perl's +split function. + +\date Started 11/23/04 +\author George +\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim +*/ + + +#include <GKlib.h> + + +/************************************************************************ +* This function tokenizes a string based on the user-supplied delimiters +* list. The resulting tokens are returned into an array of strings. +*************************************************************************/ +void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens) +{ + int i, ntoks, slen; + + tokens->strbuf = gk_strdup(str); + + slen = strlen(str); + str = tokens->strbuf; + + /* Scan once to determine the number of tokens */ + for (ntoks=0, i=0; i<slen;) { + /* Consume all the consecutive characters from the delimiters list */ + while (i<slen && strchr(delim, str[i])) + i++; + + if (i == slen) + break; + + ntoks++; + + /* Consume all the consecutive characters from the token */ + while (i<slen && !strchr(delim, str[i])) + i++; + } + + + tokens->ntoks = ntoks; + tokens->list = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list"); + + + /* Scan a second time to mark and link the tokens */ + for (ntoks=0, i=0; i<slen;) { + /* Consume all the consecutive characters from the delimiters list */ + while (i<slen && strchr(delim, str[i])) + str[i++] = '\0'; + + if (i == slen) + break; + + tokens->list[ntoks++] = str+i; + + /* Consume all the consecutive characters from the token */ + while (i<slen && !strchr(delim, str[i])) + i++; + } +} + + +/************************************************************************ +* This function frees the memory associated with a gk_Tokens_t +*************************************************************************/ +void gk_freetokenslist(gk_Tokens_t *tokens) +{ + gk_free((void *)&tokens->list, &tokens->strbuf, LTERM); +} + diff --git a/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.c b/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.c new file mode 100644 index 000000000..546857c54 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.c @@ -0,0 +1,11 @@ +/* +\file win32/adapt.c +\brief Implementation of Win32 adaptation of libc functions +*/ + +#include "adapt.h" + +pid_t getpid(void) +{ + return GetCurrentProcessId(); +} diff --git a/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.h b/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.h new file mode 100644 index 000000000..35e60ed60 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/GKlib/win32/adapt.h @@ -0,0 +1,14 @@ +/* +\file win32/adapt.h +\brief Declaration of Win32 adaptation of POSIX functions and types +*/ +#ifndef _WIN32_ADAPT_H_ +#define _WIN32_ADAPT_H_ + +#include <windows.h> + +typedef DWORD pid_t; + +pid_t getpid(void); + +#endif /* _WIN32_ADAPT_H_ */ diff --git a/3rdParty/metis/metis-5.1.1/LICENSE b/3rdParty/metis/metis-5.1.1/LICENSE new file mode 100644 index 000000000..3a098eb28 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/LICENSE @@ -0,0 +1,19 @@ + +Copyright & License Notice +--------------------------- + +Copyright 1995-2013, Regents of the University of Minnesota + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. + + \ No newline at end of file diff --git a/3rdParty/metis/metis-5.1.1/README.md b/3rdParty/metis/metis-5.1.1/README.md new file mode 100644 index 000000000..8c8873591 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/README.md @@ -0,0 +1,171 @@ +# METIS + +METIS is a set of serial programs for partitioning graphs, partitioning finite element meshes, +and producing fill reducing orderings for sparse matrices. The algorithms implemented in +METIS are based on the multilevel recursive-bisection, multilevel k-way, and multi-constraint +partitioning schemes developed in our lab. + +## Downloading METIS + +METIS uses Git submodules to manage external dependencies. Hence, please specify the `--recursive` option while cloning the repo as follow: +``` +git clone --recursive https://github.com/KarypisLab/METIS.git +``` + +## Building standalone METIS binaries and library + +To build METIS you can follow the instructions below: + +### Dependencies + +General dependencies for building slim are: gcc, cmake, build-essential. +In Ubuntu systems these can be obtained from the apt package manager (e.g., apt-get install cmake, etc) + +``` +sudo apt-get install build-essential +sudo apt-get install cmake +``` + +### Building and installing METIS + +METIS is primarily configured by passing options to make config. For example: + +``` +make config shared=1 cc=gcc prefix=~/local +make install +``` + +will configure metis to be built as a shared library using GCC and then install the binaries, header files, and libraries at + +``` +~/local/bin +~/local/include +~/local/lib +``` + +directories, respectively. + +### Common configuration options are: + + cc=[compiler] - The C compiler to use [default is determined by CMake] + shared=1 - Build a shared library instead of a static one [off by default] + prefix=[PATH] - Set the installation prefix [~/local by default] + i64=1 - Sets to 64 bits the width of the datatype that will store information + about the vertices and their adjacency lists. + r64=1 - Sets to 64 bits the width of the datatype that will store information + about floating point numbers. + +### Advanced debugging related options: + + gdb=1 - Build with support for GDB [off by default] + debug=1 - Enable debugging support [off by default] + assert=1 - Enable asserts [off by default] + assert2=1 - Enable very expensive asserts [off by default] + +### Other make commands + + make uninstall + Removes all files installed by 'make install'. + + make clean + Removes all object files but retains the configuration options. + + make distclean + Performs clean and completely removes the build directory. + + +<!--- +## Getting started + +Here are some examples to quickly try out SLIM on the sample datasets that are provided with SLIM. + +### Python interface + +```python +import pandas as pd +from SLIM import SLIM, SLIMatrix + +#read training data stored as triplets <user> <item> <rating> +traindata = pd.read_csv('../test/AutomotiveTrain.ijv', delimiter = ' ', header=None) +trainmat = SLIMatrix(traindata) + +#set up parameters to learn model, e.g., use Coordinate Descent with L1 and L2 +#regularization +params = {'algo':'cd', 'nthreads':2, 'l1r':1.0, 'l2r':1.0} + +#learn the model using training data and desired parameters +model = SLIM() +model.train(params, trainmat) + +#read test data having candidate items for users +testdata = pd.read_csv('../test/AutomotiveTest.ijv', delimiter = ' ', header=None) +#NOTE: model object is passed as an argument while generating test matrix +testmat = SLIMatrix(testdata, model) + +#generate top-10 recommendations +prediction_res = model.predict(testmat, nrcmds=10, outfile = 'output.txt') + +#dump the model to files on disk +model.save_model(modelfname='model.csr', # filename to save the model as a csr matrix + mapfname='map.csr' # filename to save the item map + ) + +#load the model from from disk +model_new = SLIM() +model_new.load_model(modelfname='model.csr', # filename of the model + mapfname='map.csr' # filename of the item map + ) +``` + +The users can also refer to the python notebook [UserGuide.ipynb](./python-package/UserGuide.ipynb) located at +`./python-package/UserGuide.ipynb` for more examples on using the python api. + +### Command-line programs +SLIM can be used by running the command-line programs that are located under `./build` directory. Specifically, SLIM provides the following three command-line programs: +- `slim_learn`: for estimating a model +- `slim_predict`: for applying a previously estimated model, and +- `slim_mselect`: for exploring a set of hyper-parameters in order to select the best performing model. + +Additional information about how to use these command-line programs is located in +SLIM's reference manual that is available at +[./doxygen/html/index.html](http://glaros.dtc.umn.edu/gkhome/files/fs/sw/slim/doc/html/index.html) +or +[./doxygen/latex/refman.pdf](http://glaros.dtc.umn.edu/gkhome/files/fs/sw/slim/doc/refman.pdf). + +### Library interface + +You can also use SLIM by direclty linking into your C/C++ program via its library interface. SLIM's API is described +in SLIM's reference manual (see links above). + +## Citing +If you use any part of this library in your research, please cite it using the +following BibTex entry: + +``` +@online{slim, + title = {{SLIM Library for Recommender Systems}}, + author = {Ning, Xia and Nikolakopoulos, Athanasios N. and Shui, Zeren and Sharma, Mohit and Karypis, George}, + url = {https://github.com/KarypisLab/SLIM}, + year = {2019}, +} +``` + +## References +1. [Slim: Sparse linear methods for top-n recommender systems](http://glaros.dtc.umn.edu/gkhome/node/774) +## Credits & Contact Information + +This implementation of SLIM was written by George Karypis with contributions by Xia Ning, Athanasios N. Nikolakopoulos, Zeren Shui and Mohit Sharma. + +If you encounter any problems or have any suggestions, please contact George Karypis at <a href="mailto:karypis@umn.edu">karypis@umn.edu</a>. + +--> + +## Copyright & License Notice +Copyright 1998-2020, Regents of the University of Minnesota + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + diff --git a/3rdParty/metis/metis-5.1.1/include/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/include/CMakeLists.txt new file mode 100644 index 000000000..9515a51b6 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/include/CMakeLists.txt @@ -0,0 +1,3 @@ +if(METIS_INSTALL) + install(FILES metis.h DESTINATION include) +endif() diff --git a/3rdParty/metis/metis-5.1.1/include/metis.h b/3rdParty/metis/metis-5.1.1/include/metis.h new file mode 100644 index 000000000..90f5163c1 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/include/metis.h @@ -0,0 +1,358 @@ +/*! +\file metis.h +\brief This file contains function prototypes and constant definitions for METIS + * +\author George +\date Started 8/9/02 +\version\verbatim $Id$\endverbatim +*/ + +#ifndef _METIS_H_ +#define _METIS_H_ + +/**************************************************************************** +* A set of defines that can be modified by the user +*****************************************************************************/ + +/*-------------------------------------------------------------------------- + Specifies the width of the elementary data type that will hold information + about vertices and their adjacency lists. + + Possible values: + 32 : Use 32 bit signed integers + 64 : Use 64 bit signed integers + + A width of 64 should be specified if the number of vertices or the total + number of edges in the graph exceed the limits of a 32 bit signed integer + i.e., 2^31-1. + Proper use of 64 bit integers requires that the c99 standard datatypes + int32_t and int64_t are supported by the compiler. + GCC does provides these definitions in stdint.h, but it may require some + modifications on other architectures. +--------------------------------------------------------------------------*/ +//#define IDXTYPEWIDTH 32 + + +/*-------------------------------------------------------------------------- + Specifies the data type that will hold floating-point style information. + + Possible values: + 32 : single precission floating point (float) + 64 : double precission floating point (double) +--------------------------------------------------------------------------*/ +//#define REALTYPEWIDTH 32 + + + +/**************************************************************************** +* In principle, nothing needs to be changed beyond this point, unless the +* int32_t and int64_t cannot be found in the normal places. +*****************************************************************************/ + +/* Uniform definitions for various compilers */ +#if defined(_MSC_VER) + #define COMPILER_MSC +#endif +#if defined(__ICC) + #define COMPILER_ICC +#endif +#if defined(__GNUC__) + #define COMPILER_GCC +#endif + +/* Include c99 int definitions and need constants. When building the library, + * these are already defined by GKlib; hence the test for _GKLIB_H_ */ +#ifndef _GKLIB_H_ +#ifdef COMPILER_MSC +#include <limits.h> + +typedef __int32 int32_t; +typedef __int64 int64_t; +#define PRId32 "I32d" +#define PRId64 "I64d" +#define SCNd32 "ld" +#define SCNd64 "I64d" +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#else +#include <inttypes.h> +#endif +#endif + + +/*------------------------------------------------------------------------ +* Setup the basic datatypes +*-------------------------------------------------------------------------*/ +#if IDXTYPEWIDTH == 32 + typedef int32_t idx_t; + + #define IDX_MAX INT32_MAX + #define IDX_MIN INT32_MIN + + #define SCIDX SCNd32 + #define PRIDX PRId32 + + #define strtoidx strtol + #define iabs abs +#elif IDXTYPEWIDTH == 64 + typedef int64_t idx_t; + + #define IDX_MAX INT64_MAX + #define IDX_MIN INT64_MIN + + #define SCIDX SCNd64 + #define PRIDX PRId64 + +#ifdef COMPILER_MSC + #define strtoidx _strtoi64 +#else + #define strtoidx strtoll +#endif + #define iabs labs +#else + #error "Incorrect user-supplied value fo IDXTYPEWIDTH" +#endif + + +#if REALTYPEWIDTH == 32 + typedef float real_t; + + #define SCREAL "f" + #define PRREAL "f" + #define REAL_MAX FLT_MAX + #define REAL_MIN FLT_MIN + #define REAL_EPSILON FLT_EPSILON + + #define rabs fabsf + #define REALEQ(x,y) ((rabs((x)-(y)) <= FLT_EPSILON)) + +#ifdef COMPILER_MSC + #define strtoreal (float)strtod +#else + #define strtoreal strtof +#endif +#elif REALTYPEWIDTH == 64 + typedef double real_t; + + #define SCREAL "lf" + #define PRREAL "lf" + #define REAL_MAX DBL_MAX + #define REAL_MIN DBL_MIN + #define REAL_EPSILON DBL_EPSILON + + #define rabs fabs + #define REALEQ(x,y) ((rabs((x)-(y)) <= DBL_EPSILON)) + + #define strtoreal strtod +#else + #error "Incorrect user-supplied value for REALTYPEWIDTH" +#endif + + +/*------------------------------------------------------------------------ +* Constant definitions +*-------------------------------------------------------------------------*/ +/* Metis's version number */ +#define METIS_VER_MAJOR 5 +#define METIS_VER_MINOR 1 +#define METIS_VER_SUBMINOR 0 + +/* The maximum length of the options[] array */ +#define METIS_NOPTIONS 40 + + + +/*------------------------------------------------------------------------ +* Function prototypes +*-------------------------------------------------------------------------*/ + +#ifdef _WINDLL +#define METIS_API(type) __declspec(dllexport) type __cdecl +#elif defined(__cdecl) +#define METIS_API(type) type __cdecl +#else +#define METIS_API(type) type +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + +METIS_API(int) METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part); + +METIS_API(int) METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *edgecut, idx_t *part); + +METIS_API(int) METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); + +METIS_API(int) METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); + +METIS_API(int) METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, + idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart); + +METIS_API(int) METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart); + +METIS_API(int) METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *options, idx_t *perm, idx_t *iperm); + +METIS_API(int) METIS_Free(void *ptr); + +METIS_API(int) METIS_SetDefaultOptions(idx_t *options); + + +/* These functions are used by ParMETIS */ + +METIS_API(int) METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, + idx_t *sizes); + +METIS_API(int) METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *options, idx_t *sepsize, idx_t *part); + +METIS_API(int) METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, + idx_t *where, idx_t *hmarker, real_t ubfactor); + + +/* These functions are used by DGL */ + +METIS_API(int) METIS_CacheFriendlyReordering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *part, idx_t *old2new); + +#ifdef __cplusplus +} +#endif + + + +/*------------------------------------------------------------------------ +* Enum type definitions +*-------------------------------------------------------------------------*/ +/*! Return codes */ +typedef enum { + METIS_OK = 1, /*!< Returned normally */ + METIS_ERROR_INPUT = -2, /*!< Returned due to erroneous inputs and/or options */ + METIS_ERROR_MEMORY = -3, /*!< Returned due to insufficient memory */ + METIS_ERROR = -4 /*!< Some other errors */ +} rstatus_et; + + +/*! Operation type codes */ +typedef enum { + METIS_OP_PMETIS, + METIS_OP_KMETIS, + METIS_OP_OMETIS +} moptype_et; + + +/*! Options codes (i.e., options[]) */ +typedef enum { + METIS_OPTION_PTYPE, + METIS_OPTION_OBJTYPE, + METIS_OPTION_CTYPE, + METIS_OPTION_IPTYPE, + METIS_OPTION_RTYPE, + METIS_OPTION_DBGLVL, + METIS_OPTION_NIPARTS, + METIS_OPTION_NITER, + METIS_OPTION_NCUTS, + METIS_OPTION_SEED, + METIS_OPTION_NO2HOP, + METIS_OPTION_ONDISK, + METIS_OPTION_MINCONN, + METIS_OPTION_CONTIG, + METIS_OPTION_COMPRESS, + METIS_OPTION_CCORDER, + METIS_OPTION_PFACTOR, + METIS_OPTION_NSEPS, + METIS_OPTION_UFACTOR, + METIS_OPTION_NUMBERING, + METIS_OPTION_DROPEDGES, + + /* Used for command-line parameter purposes */ + METIS_OPTION_HELP, + METIS_OPTION_TPWGTS, + METIS_OPTION_NCOMMON, + METIS_OPTION_NOOUTPUT, + METIS_OPTION_BALANCE, + METIS_OPTION_GTYPE, + METIS_OPTION_UBVEC +} moptions_et; + + +/*! Partitioning Schemes */ +typedef enum { + METIS_PTYPE_RB, + METIS_PTYPE_KWAY +} mptype_et; + +/*! Graph types for meshes */ +typedef enum { + METIS_GTYPE_DUAL, + METIS_GTYPE_NODAL +} mgtype_et; + +/*! Coarsening Schemes */ +typedef enum { + METIS_CTYPE_RM, + METIS_CTYPE_SHEM +} mctype_et; + +/*! Initial partitioning schemes */ +typedef enum { + METIS_IPTYPE_GROW, + METIS_IPTYPE_RANDOM, + METIS_IPTYPE_EDGE, + METIS_IPTYPE_NODE, + METIS_IPTYPE_METISRB +} miptype_et; + + +/*! Refinement schemes */ +typedef enum { + METIS_RTYPE_FM, + METIS_RTYPE_GREEDY, + METIS_RTYPE_SEP2SIDED, + METIS_RTYPE_SEP1SIDED +} mrtype_et; + + +/*! Debug Levels */ +typedef enum { + METIS_DBG_INFO = 1, /*!< Shows various diagnostic messages */ + METIS_DBG_TIME = 2, /*!< Perform timing analysis */ + METIS_DBG_COARSEN = 4, /*!< Show the coarsening progress */ + METIS_DBG_REFINE = 8, /*!< Show the refinement progress */ + METIS_DBG_IPART = 16, /*!< Show info on initial partitioning */ + METIS_DBG_MOVEINFO = 32, /*!< Show info on vertex moves during refinement */ + METIS_DBG_SEPINFO = 64, /*!< Show info on vertex moves during sep refinement */ + METIS_DBG_CONNINFO = 128, /*!< Show info on minimization of subdomain connectivity */ + METIS_DBG_CONTIGINFO = 256, /*!< Show info on elimination of connected components */ + METIS_DBG_MEMORY = 2048, /*!< Show info related to wspace allocation */ +} mdbglvl_et; + + +/* Types of objectives */ +typedef enum { + METIS_OBJTYPE_CUT, + METIS_OBJTYPE_VOL, + METIS_OBJTYPE_NODE +} mobjtype_et; + + + +#endif /* _METIS_H_ */ diff --git a/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt b/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt new file mode 100644 index 000000000..85f96b08a --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/CMakeLists.txt @@ -0,0 +1,16 @@ +# Add this directory for internal users. +#include_directories(.) +# Find sources. +file(GLOB metis_sources *.c) +# Build libmetis. +add_library(metis ${METIS_LIBRARY_TYPE} ${GKlib_sources} ${metis_sources}) +if(UNIX) + target_link_libraries(metis m) +endif() + +if(METIS_INSTALL) + install(TARGETS metis + LIBRARY DESTINATION lib + RUNTIME DESTINATION lib + ARCHIVE DESTINATION lib) +endif() diff --git a/3rdParty/metis/metis-5.1.1/libmetis/auxapi.c b/3rdParty/metis/metis-5.1.1/libmetis/auxapi.c new file mode 100644 index 000000000..8976b4ba4 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/auxapi.c @@ -0,0 +1,43 @@ +/** +\file +\brief This file contains various helper API routines for using METIS. + +\date Started 5/12/2011 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: auxapi.c 10409 2011-06-25 16:58:34Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function free memory that was allocated by METIS and retuned + to the application. + + \param ptr points to the memory that was previously allocated by + METIS. +*/ +/*************************************************************************/ +int METIS_Free(void *ptr) +{ + if (ptr != NULL) free(ptr); + return METIS_OK; +} + + +/*************************************************************************/ +/*! This function sets the default values for the options. + + \param options points to an array of size at least METIS_NOPTIONS. +*/ +/*************************************************************************/ +int METIS_SetDefaultOptions(idx_t *options) +{ + iset(METIS_NOPTIONS, -1, options); + + return METIS_OK; +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/balance.c b/3rdParty/metis/metis-5.1.1/libmetis/balance.c new file mode 100644 index 000000000..6e88b5323 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/balance.c @@ -0,0 +1,498 @@ +/*! +\file +\brief Functions for the edge-based balancing + +\date Started 7/23/97 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: balance.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim +*/ + +#include "metislib.h" + +/************************************************************************* +* This function is the entry poidx_t of the bisection balancing algorithms. +**************************************************************************/ +void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) +{ + if (ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors) <= 0) + return; + + if (graph->ncon == 1) { + /* return right away if the balance is OK */ + if (rabs(ntpwgts[0]*graph->tvwgt[0]-graph->pwgts[0]) < 3*graph->tvwgt[0]/graph->nvtxs) + return; + + if (graph->nbnd > 0) + Bnd2WayBalance(ctrl, graph, ntpwgts); + else + General2WayBalance(ctrl, graph, ntpwgts); + } + else { + McGeneral2WayBalance(ctrl, graph, ntpwgts); + } +} + + +/************************************************************************* +* This function balances two partitions by moving boundary nodes +* from the domain that is overweight to the one that is underweight. +**************************************************************************/ +void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) +{ + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *perm; + rpq_t *queue; + idx_t higain, mincut, mindiff; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + /* Determine from which domain you will be moving data */ + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0] - tpwgts[0]; + mindiff = iabs(tpwgts[0]-pwgts[0]); + from = (pwgts[0] < tpwgts[0] ? 1 : 0); + to = (from+1)%2; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n", + pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, + graph->mincut)); + + queue = rpqCreate(nvtxs); + + iset(nvtxs, -1, moved); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert the boundary nodes of the proper partition whose size is OK in the priority queue */ + nbnd = graph->nbnd; + irandArrayPermute(nbnd, perm, nbnd/5, 1); + for (ii=0; ii<nbnd; ii++) { + i = perm[ii]; + ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); + ASSERT(bndptr[bndind[i]] != -1); + if (where[bndind[i]] == from && vwgt[bndind[i]] <= mindiff) + rpqInsert(queue, bndind[i], ed[bndind[i]]-id[bndind[i]]); + } + + mincut = graph->mincut; + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if ((higain = rpqGetTop(queue)) == -1) + break; + ASSERT(bndptr[higain] != -1); + + if (pwgts[to]+vwgt[higain] > tpwgts[to]) + break; + + mincut -= (ed[higain]-id[higain]); + INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); + + where[higain] = to; + moved[higain] = nswaps; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + /* Update its boundary information and queue position */ + if (bndptr[k] != -1) { /* If k was a boundary vertex */ + if (ed[k] == 0) { /* Not a boundary vertex any more */ + BNDDelete(nbnd, bndind, bndptr, k); + if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) /* Remove it if in the queues */ + rpqDelete(queue, k); + } + else { /* If it has not been moved, update its position in the queue */ + if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) + rpqUpdate(queue, k, ed[k]-id[k]); + } + } + else { + if (ed[k] > 0) { /* It will now become a boundary vertex */ + BNDInsert(nbnd, bndind, bndptr, k); + if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) + rpqInsert(queue, k, ed[k]-id[k]); + } + } + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + rpqDestroy(queue); + + WCOREPOP; +} + + +/************************************************************************* +* This function balances two partitions by moving the highest gain +* (including negative gain) vertices to the other domain. +* It is used only when tha unbalance is due to non contigous +* subdomains. That is, the are no boundary vertices. +* It moves vertices from the domain that is overweight to the one that +* is underweight. +**************************************************************************/ +void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) +{ + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *perm; + rpq_t *queue; + idx_t higain, mincut, mindiff; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + /* Determine from which domain you will be moving data */ + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0] - tpwgts[0]; + mindiff = iabs(tpwgts[0]-pwgts[0]); + from = (pwgts[0] < tpwgts[0] ? 1 : 0); + to = (from+1)%2; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n", + pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + + queue = rpqCreate(nvtxs); + + iset(nvtxs, -1, moved); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert the nodes of the proper partition whose size is OK in the priority queue */ + irandArrayPermute(nvtxs, perm, nvtxs/5, 1); + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + if (where[i] == from && vwgt[i] <= mindiff) + rpqInsert(queue, i, ed[i]-id[i]); + } + + mincut = graph->mincut; + nbnd = graph->nbnd; + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if ((higain = rpqGetTop(queue)) == -1) + break; + + if (pwgts[to]+vwgt[higain] > tpwgts[to]) + break; + + mincut -= (ed[higain]-id[higain]); + INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); + + where[higain] = to; + moved[higain] = nswaps; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + /* Update the queue position */ + if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) + rpqUpdate(queue, k, ed[k]-id[k]); + + /* Update its boundary information */ + if (ed[k] == 0 && bndptr[k] != -1) + BNDDelete(nbnd, bndind, bndptr, k); + else if (ed[k] > 0 && bndptr[k] == -1) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + rpqDestroy(queue); + + WCOREPOP; +} + + +/************************************************************************* +* This function performs an edge-based FM refinement +**************************************************************************/ +void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts) +{ + idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, + me, limit, tmp, cnum; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts, *id, *ed, *bndptr, *bndind; + idx_t *moved, *swaps, *perm, *qnum, *qsizes; + idx_t higain, mincut, newcut, mincutorder; + real_t *invtvwgt, *minbalv, *newbalv, minbal, newbal; + rpq_t **queues; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + invtvwgt = graph->invtvwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); + newbalv = rwspacemalloc(ctrl, ncon); + minbalv = rwspacemalloc(ctrl, ncon); + qsizes = iwspacemalloc(ctrl, 2*ncon); + + limit = gk_min(gk_max(0.01*nvtxs, 15), 100); + + /* Initialize the queues */ + queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *)); + for (i=0; i<2*ncon; i++) { + queues[i] = rpqCreate(nvtxs); + qsizes[i] = 0; + } + + for (i=0; i<nvtxs; i++) { + qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt); + qsizes[2*qnum[i]+where[i]]++; + } + + + /* for the empty queues, move into them vertices from other queues */ + for (from=0; from<2; from++) { + for (j=0; j<ncon; j++) { + if (qsizes[2*j+from] == 0) { + for (i=0; i<nvtxs; i++) { + if (where[i] != from) + continue; + + k = iargmax2_nrm(ncon, vwgt+i*ncon, invtvwgt); + if (k == j && + qsizes[2*qnum[i]+from] > qsizes[2*j+from] && + vwgt[i*ncon+qnum[i]]*invtvwgt[qnum[i]] < 1.3*vwgt[i*ncon+j]*invtvwgt[j]) { + qsizes[2*qnum[i]+from]--; + qsizes[2*j+from]++; + qnum[i] = j; + } + } + } + } + } + + + minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, minbalv); + ASSERT(minbal > 0.0); + + newcut = mincut = graph->mincut; + mincutorder = -1; + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("Parts: ["); + for (l=0; l<ncon; l++) + printf("(%6"PRIDX" %6"PRIDX" %.3"PRREAL" %.3"PRREAL") ", + pwgts[l], pwgts[ncon+l], ntpwgts[l], ntpwgts[ncon+l]); + printf("] Nv-Nb[%5"PRIDX", %5"PRIDX"]. ICut: %6"PRIDX", LB: %+.3"PRREAL" [B]\n", + graph->nvtxs, graph->nbnd, graph->mincut, minbal); + } + + iset(nvtxs, -1, moved); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert all nodes in the priority queues */ + nbnd = graph->nbnd; + irandArrayPermute(nvtxs, perm, nvtxs/10, 1); + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + rpqInsert(queues[2*qnum[i]+where[i]], i, ed[i]-id[i]); + } + + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if (minbal <= 0.0) + break; + + SelectQueue(graph, ctrl->pijbm, ctrl->ubfactors, queues, &from, &cnum); + to = (from+1)%2; + + if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1) + break; + + newcut -= (ed[higain]-id[higain]); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, newbalv); + + if (newbal < minbal || (newbal == minbal && + (newcut < mincut || + (newcut == mincut && BetterBalance2Way(ncon, minbalv, newbalv))))) { + mincut = newcut; + minbal = newbal; + mincutorder = nswaps; + rcopy(ncon, newbalv, minbalv); + } + else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ + newcut += (ed[higain]-id[higain]); + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + break; + } + + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + if (ctrl->dbglvl&METIS_DBG_MOVEINFO) { + printf("Moved %6"PRIDX" from %"PRIDX"(%"PRIDX"). Gain: %5"PRIDX", " + "Cut: %5"PRIDX", NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); + for (l=0; l<ncon; l++) + printf("(%6"PRIDX", %6"PRIDX") ", pwgts[l], pwgts[ncon+l]); + printf(", %+.3"PRREAL" LB: %+.3"PRREAL"\n", minbal, newbal); + } + + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + /* Update the queue position */ + if (moved[k] == -1) + rpqUpdate(queues[2*qnum[k]+where[k]], k, ed[k]-id[k]); + + /* Update its boundary information */ + if (ed[k] == 0 && bndptr[k] != -1) + BNDDelete(nbnd, bndind, bndptr, k); + else if (ed[k] > 0 && bndptr[k] == -1) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + + + /**************************************************************** + * Roll back computations + *****************************************************************/ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + to = where[higain] = (where[higain]+1)%2; + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + else if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1); + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + if (bndptr[k] != -1 && ed[k] == 0) + BNDDelete(nbnd, bndind, bndptr, k); + if (bndptr[k] == -1 && ed[k] > 0) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\tMincut: %6"PRIDX" at %5"PRIDX", NBND: %6"PRIDX", NPwgts: [", + mincut, mincutorder, nbnd); + for (l=0; l<ncon; l++) + printf("(%6"PRIDX", %6"PRIDX") ", pwgts[l], pwgts[ncon+l]); + printf("], LB: %.3"PRREAL"\n", ComputeLoadImbalance(graph, 2, ctrl->pijbm)); + } + + graph->mincut = mincut; + graph->nbnd = nbnd; + + + for (i=0; i<2*ncon; i++) + rpqDestroy(queues[i]); + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/bucketsort.c b/3rdParty/metis/metis-5.1.1/libmetis/bucketsort.c new file mode 100644 index 000000000..e126d02a6 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/bucketsort.c @@ -0,0 +1,44 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * bucketsort.c + * + * This file contains code that implement a variety of counting sorting + * algorithms + * + * Started 7/25/97 + * George + * + */ + +#include "metislib.h" + + + +/************************************************************************* +* This function uses simple counting sort to return a permutation array +* corresponding to the sorted order. The keys are arsumed to start from +* 0 and they are positive. This sorting is used during matching. +**************************************************************************/ +void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys, + idx_t *tperm, idx_t *perm) +{ + idx_t i, ii; + idx_t *counts; + + WCOREPUSH; + + counts = iset(max+2, 0, iwspacemalloc(ctrl, max+2)); + + for (i=0; i<n; i++) + counts[keys[i]]++; + MAKECSR(i, max+1, counts); + + for (ii=0; ii<n; ii++) { + i = tperm[ii]; + perm[counts[keys[i]]++] = i; + } + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/checkgraph.c b/3rdParty/metis/metis-5.1.1/libmetis/checkgraph.c new file mode 100644 index 000000000..fd4b38e76 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/checkgraph.c @@ -0,0 +1,266 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * checkgraph.c + * + * This file contains routines related to I/O + * + * Started 8/28/94 + * George + * + */ + +#include "metislib.h" + + + +/*************************************************************************/ +/*! This function checks if a graph is valid. A valid graph must satisfy + the following constraints: + - It should contain no self-edges. + - It should be undirected; i.e., (u,v) and (v,u) should be present. + - The adjacency list should not contain multiple edges to the same + other vertex. + + \param graph is the graph to be checked, whose numbering starts from 0. + \param numflag is 0 if error reporting will be done using 0 as the + numbering, or 1 if the reporting should be done using 1. + \param verbose is 1 the identified errors will be displayed, or 0, if + it should run silently. +*/ +/*************************************************************************/ +int CheckGraph(graph_t *graph, int numflag, int verbose) +{ + idx_t i, j, k, l; + idx_t nvtxs, err=0; + idx_t minedge, maxedge, minewgt, maxewgt; + idx_t *xadj, *adjncy, *adjwgt, *htable; + + numflag = (numflag == 0 ? 0 : 1); /* make sure that numflag is 0 or 1 */ + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + htable = ismalloc(nvtxs, 0, "htable"); + + minedge = maxedge = adjncy[0]; + if (adjwgt) + minewgt = maxewgt = adjwgt[0]; + + for (i=0; i<nvtxs; i++) { + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + + minedge = (k < minedge) ? k : minedge; + maxedge = (k > maxedge) ? k : maxedge; + if (adjwgt) { + minewgt = (adjwgt[j] < minewgt) ? adjwgt[j] : minewgt; + maxewgt = (adjwgt[j] > maxewgt) ? adjwgt[j] : maxewgt; + } + + if (i == k) { + if (verbose) + printf("Vertex %"PRIDX" contains a self-loop " + "(i.e., diagonal entry in the matrix)!\n", i+numflag); + err++; + } + else { + for (l=xadj[k]; l<xadj[k+1]; l++) { + if (adjncy[l] == i) { + if (adjwgt) { + if (adjwgt[l] != adjwgt[j]) { + if (verbose) + printf("Edges (u:%"PRIDX" v:%"PRIDX" wgt:%"PRIDX") and " + "(v:%"PRIDX" u:%"PRIDX" wgt:%"PRIDX") " + "do not have the same weight!\n", + i+numflag, k+numflag, adjwgt[j], + k+numflag, i+numflag, adjwgt[l]); + err++; + } + } + break; + } + } + if (l == xadj[k+1]) { + if (verbose) + printf("Missing edge: (%"PRIDX" %"PRIDX")!\n", k+numflag, i+numflag); + err++; + } + } + + if (htable[k] == 0) { + htable[k]++; + } + else { + if (verbose) + printf("Edge %"PRIDX" from vertex %"PRIDX" is repeated %"PRIDX" times\n", + k+numflag, i+numflag, htable[k]++); + err++; + } + } + + for (j=xadj[i]; j<xadj[i+1]; j++) + htable[adjncy[j]] = 0; + } + + + if (err > 0 && verbose) { + printf("A total of %"PRIDX" errors exist in the input file. " + "Correct them, and run again!\n", err); + } + + gk_free((void **)&htable, LTERM); + + return (err == 0 ? 1 : 0); +} + + +/*************************************************************************/ +/*! This function performs a quick check of the weights of the graph */ +/*************************************************************************/ +int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) +{ + idx_t i; + + if (ncon <= 0) { + printf("Input Error: ncon must be >= 1.\n"); + return 0; + } + + if (vwgt) { + for (i=ncon*nvtxs; i>=0; i--) { + if (vwgt[i] < 0) { + printf("Input Error: negative vertex weight(s).\n"); + return 0; + } + } + } + if (vsize) { + for (i=nvtxs; i>=0; i--) { + if (vsize[i] < 0) { + printf("Input Error: negative vertex sizes(s).\n"); + return 0; + } + } + } + if (adjwgt) { + for (i=xadj[nvtxs]-1; i>=0; i--) { + if (adjwgt[i] < 0) { + printf("Input Error: non-positive edge weight(s).\n"); + return 0; + } + } + } + + return 1; +} + + +/*************************************************************************/ +/*! This function creates a graph whose topology is consistent with + Metis' requirements that: + - There are no self-edges. + - It is undirected; i.e., (u,v) and (v,u) should be present and of the + same weight. + - The adjacency list should not contain multiple edges to the same + other vertex. + + Any of the above errors are fixed by performing the following operations: + - Self-edges are removed. + - The undirected graph is formed by the union of edges. + - One of the duplicate edges is selected. + + The routine does not change the provided vertex weights. +*/ +/*************************************************************************/ +graph_t *FixGraph(graph_t *graph) +{ + idx_t i, j, k, l, nvtxs, nedges; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *nxadj, *nadjncy, *nadjwgt; + graph_t *ngraph; + uvw_t *edges; + + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + ASSERT(adjwgt != NULL); + + ngraph = CreateGraph(); + + ngraph->nvtxs = nvtxs; + + /* deal with vertex weights/sizes */ + ngraph->ncon = graph->ncon; + ngraph->vwgt = icopy(nvtxs*graph->ncon, graph->vwgt, + imalloc(nvtxs*graph->ncon, "FixGraph: vwgt")); + + ngraph->vsize = ismalloc(nvtxs, 1, "FixGraph: vsize"); + if (graph->vsize) + icopy(nvtxs, graph->vsize, ngraph->vsize); + + /* fix graph by sorting the "superset" of edges */ + edges = (uvw_t *)gk_malloc(sizeof(uvw_t)*2*xadj[nvtxs], "FixGraph: edges"); + + for (nedges=0, i=0; i<nvtxs; i++) { + for (j=xadj[i]; j<xadj[i+1]; j++) { + /* keep only the upper-trianglular part of the adjacency matrix */ + if (i < adjncy[j]) { + edges[nedges].u = i; + edges[nedges].v = adjncy[j]; + edges[nedges].w = adjwgt[j]; + nedges++; + } + else if (i > adjncy[j]) { + edges[nedges].u = adjncy[j]; + edges[nedges].v = i; + edges[nedges].w = adjwgt[j]; + nedges++; + } + } + } + + uvwsorti(nedges, edges); + + + /* keep the unique subset */ + for (k=0, i=1; i<nedges; i++) { + if (edges[k].v != edges[i].v || edges[k].u != edges[i].u) { + edges[++k] = edges[i]; + } + } + nedges = k+1; + + /* allocate memory for the fixed graph */ + nxadj = ngraph->xadj = ismalloc(nvtxs+1, 0, "FixGraph: nxadj"); + nadjncy = ngraph->adjncy = imalloc(2*nedges, "FixGraph: nadjncy"); + nadjwgt = ngraph->adjwgt = imalloc(2*nedges, "FixGraph: nadjwgt"); + + /* create the adjacency list of the fixed graph from the upper-triangular + part of the adjacency matrix */ + for (k=0; k<nedges; k++) { + nxadj[edges[k].u]++; + nxadj[edges[k].v]++; + } + MAKECSR(i, nvtxs, nxadj); + + for (k=0; k<nedges; k++) { + nadjncy[nxadj[edges[k].u]] = edges[k].v; + nadjncy[nxadj[edges[k].v]] = edges[k].u; + nadjwgt[nxadj[edges[k].u]] = edges[k].w; + nadjwgt[nxadj[edges[k].v]] = edges[k].w; + nxadj[edges[k].u]++; + nxadj[edges[k].v]++; + } + SHIFTCSR(i, nvtxs, nxadj); + + gk_free((void **)&edges, LTERM); + + return ngraph; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/coarsen.c b/3rdParty/metis/metis-5.1.1/libmetis/coarsen.c new file mode 100644 index 000000000..447fc43e8 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/coarsen.c @@ -0,0 +1,1971 @@ +/*! +\file +\brief Functions for computing matchings during graph coarsening + +\date Started 7/23/97 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: coarsen.c 20398 2016-11-22 17:17:12Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + +#define UNMATCHEDFOR2HOP 0.10 /* The fraction of unmatched vertices that triggers 2-hop */ + + +/*************************************************************************/ +/*! This function takes a graph and creates a sequence of coarser graphs. + It implements the coarsening phase of the multilevel paradigm. + */ +/*************************************************************************/ +graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, eqewgts, level=0; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr)); + + /* determine if the weights on the edges are all the same */ + for (eqewgts=1, i=1; i<graph->nedges; i++) { + if (graph->adjwgt[0] != graph->adjwgt[i]) { + eqewgts = 0; + break; + } + } + + /* set the maximum allowed coarsest vertex weight */ + for (i=0; i<graph->ncon; i++) + ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo; + + do { + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + + /* allocate memory for cmap, if it has not already been done due to + multiple cuts */ + if (graph->cmap == NULL) + graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap"); + + /* determine which matching scheme you will use */ + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + Match_RM(ctrl, graph); + break; + case METIS_CTYPE_SHEM: + if (eqewgts || graph->nedges == 0) + Match_RM(ctrl, graph); + else + Match_SHEM(ctrl, graph); + break; + default: + gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype); + } + + graph_WriteToDisk(ctrl, graph); + + graph = graph->coarser; + eqewgts = 0; + level++; + + ASSERT(CheckGraph(graph, 0, 1)); + + } while (graph->nvtxs > ctrl->CoarsenTo && + graph->nvtxs < COARSEN_FRACTION*graph->finer->nvtxs && + graph->nedges > graph->nvtxs/2); + + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr)); + + return graph; +} + + +/*************************************************************************/ +/*! This function takes a graph and creates a sequence of nlevels coarser + graphs, where nlevels is an input parameter. + */ +/*************************************************************************/ +graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels) +{ + idx_t i, eqewgts, level; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr)); + + /* determine if the weights on the edges are all the same */ + for (eqewgts=1, i=1; i<graph->nedges; i++) { + if (graph->adjwgt[0] != graph->adjwgt[i]) { + eqewgts = 0; + break; + } + } + + /* set the maximum allowed coarsest vertex weight */ + for (i=0; i<graph->ncon; i++) + ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo; + + for (level=0; level<nlevels; level++) { + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + + /* allocate memory for cmap, if it has not already been done due to + multiple cuts */ + if (graph->cmap == NULL) + graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap"); + + /* determine which matching scheme you will use */ + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + Match_RM(ctrl, graph); + break; + case METIS_CTYPE_SHEM: + if (eqewgts || graph->nedges == 0) + Match_RM(ctrl, graph); + else + Match_SHEM(ctrl, graph); + break; + default: + gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype); + } + + graph_WriteToDisk(ctrl, graph); + + graph = graph->coarser; + eqewgts = 0; + + ASSERT(CheckGraph(graph, 0, 1)); + + if (graph->nvtxs < ctrl->CoarsenTo || + graph->nvtxs > COARSEN_FRACTION*graph->finer->nvtxs || + graph->nedges < graph->nvtxs/2) + break; + } + + IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr)); + + return graph; +} + + +/*************************************************************************/ +/*! This function finds a matching by randomly selecting one of the + unmatched adjacent vertices. + */ +/**************************************************************************/ +idx_t Match_RM(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm; + size_t nunmatched=0; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + /* Determine a "random" traversal order that is biased towards + low-degree vertices */ + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i<nvtxs; i++) { + bnum = sqrt(1+xadj[i+1]-xadj[i]); + degrees[i] = (bnum > avgdegree ? avgdegree : bnum); + } + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); + + + /* Traverse the vertices and compute the matching */ + for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + + if (match[i] == UNMATCHED) { /* Unmatched */ + maxidx = i; + + if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) { + /* Deal with island vertices. Find a non-island and match it with. + The matching ignores ctrl->maxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatched<nvtxs; last_unmatched++) { + j = perm[last_unmatched]; + if (match[j] == UNMATCHED) { + maxidx = j; + break; + } + } + } + else { + /* Find a random matching, subject to maxvwgt constraints */ + if (ncon == 1) { + /* single constraint version */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= maxvwgt[0]) { + maxidx = k; + break; + } + } + + /* If it did not match, record for a 2-hop matching. */ + if (maxidx == i && 2*vwgt[i] < maxvwgt[0]) { + nunmatched++; + maxidx = UNMATCHED; + } + } + else { + /* multi-constraint version */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (match[k] == UNMATCHED && + ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt)) { + maxidx = k; + break; + } + } + + /* If it did not match, record for a 2-hop matching. */ + if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) { + nunmatched++; + maxidx = UNMATCHED; + } + } + } + } + + if (maxidx != UNMATCHED) { + cmap[i] = cmap[maxidx] = cnvtxs++; + match[i] = maxidx; + match[maxidx] = i; + } + } + } + + //printf("nunmatched: %zu\n", nunmatched); + + /* see if a 2-hop matching is required/allowed */ + if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) + cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched); + + + /* match the final unmatched vertices with themselves and reorder the vertices + of the coarse graph for memory-friendly contraction */ + for (cnvtxs=0, i=0; i<nvtxs; i++) { + if (match[i] == UNMATCHED) { + match[i] = i; + cmap[i] = cnvtxs++; + } + else { + if (i <= match[i]) + cmap[i] = cmap[match[i]] = cnvtxs++; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); + + WCOREPOP; + + return cnvtxs; +} + + +/**************************************************************************/ +/*! This function finds a matching using the HEM heuristic. The vertices + are visited based on increasing degree to ensure that all vertices are + given a chance to match with something. + */ +/**************************************************************************/ +idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, maxwgt, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm; + size_t nunmatched=0; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + /* Determine a "random" traversal order that is biased towards low-degree vertices */ + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i<nvtxs; i++) { + bnum = sqrt(1+xadj[i+1]-xadj[i]); + degrees[i] = (bnum > avgdegree ? avgdegree : bnum); + } + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); + + + /* Traverse the vertices and compute the matching */ + for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + + if (match[i] == UNMATCHED) { /* Unmatched */ + maxidx = i; + maxwgt = -1; + + if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) { + /* Deal with island vertices. Find a non-island and match it with. + The matching ignores ctrl->maxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatched<nvtxs; last_unmatched++) { + j = perm[last_unmatched]; + if (match[j] == UNMATCHED) { + maxidx = j; + break; + } + } + } + else { + /* Find a heavy-edge matching, subject to maxvwgt constraints */ + if (ncon == 1) { + /* single constraint version */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (match[k] == UNMATCHED && + maxwgt < adjwgt[j] && vwgt[i]+vwgt[k] <= maxvwgt[0]) { + maxidx = k; + maxwgt = adjwgt[j]; + } + } + + /* If it did not match, record for a 2-hop matching. */ + if (maxidx == i && 2*vwgt[i] < maxvwgt[0]) { + nunmatched++; + maxidx = UNMATCHED; + } + } + else { + /* multi-constraint version */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (match[k] == UNMATCHED && + ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt) && + (maxwgt < adjwgt[j] || + (maxwgt == adjwgt[j] && + BetterVBalance(ncon, graph->invtvwgt, vwgt+i*ncon, + vwgt+maxidx*ncon, vwgt+k*ncon)))) { + maxidx = k; + maxwgt = adjwgt[j]; + } + } + + /* If it did not match, record for a 2-hop matching. */ + if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) { + nunmatched++; + maxidx = UNMATCHED; + } + } + } + } + + if (maxidx != UNMATCHED) { + cmap[i] = cmap[maxidx] = cnvtxs++; + match[i] = maxidx; + match[maxidx] = i; + } + } + } + + //printf("nunmatched: %zu\n", nunmatched); + + /* see if a 2-hop matching is required/allowed */ + if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) + cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched); + + + /* match the final unmatched vertices with themselves and reorder the vertices + of the coarse graph for memory-friendly contraction */ + for (cnvtxs=0, i=0; i<nvtxs; i++) { + if (match[i] == UNMATCHED) { + match[i] = i; + cmap[i] = cnvtxs++; + } + else { + if (i <= match[i]) + cmap[i] = cmap[match[i]] = cnvtxs++; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); + + WCOREPOP; + + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices using a 2-hop matching + that involves vertices that are two hops away from each other. */ +/**************************************************************************/ +idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t nunmatched) +{ + + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 2); + cnvtxs = Match_2HopAll(ctrl, graph, perm, match, cnvtxs, &nunmatched, 64); + if (nunmatched > 1.5*UNMATCHEDFOR2HOP*graph->nvtxs) + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 3); + if (nunmatched > 2.0*UNMATCHEDFOR2HOP*graph->nvtxs) + cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, graph->nvtxs); + + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices whose degree is less than + maxdegree using a 2-hop matching that involves vertices that are two + hops away from each other. + The requirement of the 2-hop matching is a simple non-empty overlap + between the adjancency lists of the vertices. */ +/**************************************************************************/ +idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree) +{ + idx_t i, pi, ii, j, jj, k, nvtxs; + idx_t *xadj, *adjncy, *colptr, *rowind; + idx_t *cmap; + size_t nunmatched; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + cmap = graph->cmap; + + nunmatched = *r_nunmatched; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */ + + /* create the inverted index */ + WCOREPUSH; + colptr = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs+1)); + for (i=0; i<nvtxs; i++) { + if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) { + for (j=xadj[i]; j<xadj[i+1]; j++) + colptr[adjncy[j]]++; + } + } + MAKECSR(i, nvtxs, colptr); + + rowind = iwspacemalloc(ctrl, colptr[nvtxs]); + for (pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) { + for (j=xadj[i]; j<xadj[i+1]; j++) + rowind[colptr[adjncy[j]]++] = i; + } + } + SHIFTCSR(i, nvtxs, colptr); + + /* compute matchings by going down the inverted index */ + for (pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + if (colptr[i+1]-colptr[i] < 2) + continue; + + for (jj=colptr[i+1], j=colptr[i]; j<jj; j++) { + if (match[rowind[j]] == UNMATCHED) { + for (jj--; jj>j; jj--) { + if (match[rowind[jj]] == UNMATCHED) { + cmap[rowind[j]] = cmap[rowind[jj]] = cnvtxs++; + match[rowind[j]] = rowind[jj]; + match[rowind[jj]] = rowind[j]; + nunmatched -= 2; + break; + } + } + } + } + } + WCOREPOP; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: nunmatched: %zu\n", nunmatched)); */ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); + + *r_nunmatched = nunmatched; + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function matches the unmatched vertices whose degree is less than + maxdegree using a 2-hop matching that involves vertices that are two + hops away from each other. + The requirement of the 2-hop matching is that of identical adjacency + lists. + */ +/**************************************************************************/ +idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree) +{ + idx_t i, pi, pk, ii, j, jj, k, nvtxs, mask, idegree; + idx_t *xadj, *adjncy; + idx_t *cmap, *mark; + ikv_t *keys; + size_t nunmatched, ncand; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + cmap = graph->cmap; + + nunmatched = *r_nunmatched; + mask = IDX_MAX/maxdegree; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */ + + WCOREPUSH; + + /* collapse vertices with identical adjancency lists */ + keys = ikvwspacemalloc(ctrl, nunmatched); + for (ncand=0, pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + idegree = xadj[i+1]-xadj[i]; + if (match[i] == UNMATCHED && idegree > 1 && idegree < maxdegree) { + for (k=0, j=xadj[i]; j<xadj[i+1]; j++) + k += adjncy[j]%mask; + keys[ncand].val = i; + keys[ncand].key = (k%mask)*maxdegree + idegree; + ncand++; + } + } + ikvsorti(ncand, keys); + + mark = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + for (pi=0; pi<ncand; pi++) { + i = keys[pi].val; + if (match[i] != UNMATCHED) + continue; + + for (j=xadj[i]; j<xadj[i+1]; j++) + mark[adjncy[j]] = i; + + for (pk=pi+1; pk<ncand; pk++) { + k = keys[pk].val; + if (match[k] != UNMATCHED) + continue; + + if (keys[pi].key != keys[pk].key) + break; + if (xadj[i+1]-xadj[i] != xadj[k+1]-xadj[k]) + break; + + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + if (mark[adjncy[jj]] != i) + break; + } + if (jj == xadj[k+1]) { + cmap[i] = cmap[k] = cnvtxs++; + match[i] = k; + match[k] = i; + nunmatched -= 2; + break; + } + } + } + WCOREPOP; + + /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: ncand: %zu, nunmatched: %zu\n", ncand, nunmatched)); */ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); + + *r_nunmatched = nunmatched; + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function finds a matching by selecting an adjacent vertex based + on the Jaccard coefficient of the adjaceny lists. + */ +/**************************************************************************/ +idx_t Match_JC(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, pi, ii, iii, j, jj, jjj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, + last_unmatched, avgdegree, bnum; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt; + idx_t *match, *cmap, *degrees, *perm, *tperm, *vec, *marker; + idx_t mytwgt, xtwgt, ctwgt; + float bscore, score; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + maxvwgt = ctrl->maxvwgt; + + match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs)); + perm = iwspacemalloc(ctrl, nvtxs); + tperm = iwspacemalloc(ctrl, nvtxs); + degrees = iwspacemalloc(ctrl, nvtxs); + + irandArrayPermute(nvtxs, tperm, nvtxs/8, 1); + + avgdegree = 4.0*(xadj[nvtxs]/nvtxs); + for (i=0; i<nvtxs; i++) { + bnum = sqrt(1+xadj[i+1]-xadj[i]); + degrees[i] = (bnum > avgdegree ? avgdegree : bnum); + } + BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm); + + /* point to the wspace vectors that are not needed any more */ + vec = tperm; + marker = degrees; + iset(nvtxs, -1, vec); + iset(nvtxs, -1, marker); + + for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) { + i = perm[pi]; + + if (match[i] == UNMATCHED) { /* Unmatched */ + maxidx = i; + + if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) { + /* Deal with island vertices. Find a non-island and match it with. + The matching ignores ctrl->maxvwgt requirements */ + if (xadj[i] == xadj[i+1]) { + last_unmatched = gk_max(pi, last_unmatched)+1; + for (; last_unmatched<nvtxs; last_unmatched++) { + j = perm[last_unmatched]; + if (match[j] == UNMATCHED) { + maxidx = j; + break; + } + } + } + else { + if (ncon == 1) { + /* Find a max JC pair, subject to maxvwgt constraints */ + if (xadj[i+1]-xadj[i] < avgdegree) { + marker[i] = i; + bscore = 0.0; + mytwgt = 0; + for (j=xadj[i]; j<xadj[i+1]; j++) { + mytwgt += 1;//adjwgt[j]; + vec[adjncy[j]] = 1;//adjwgt[j]; + } + + /* single constraint pairing */ +#ifdef XXX + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + if (marker[ii] == i || match[ii] != UNMATCHED || vwgt[i]+vwgt[ii] > maxvwgt[0]) + continue; + + ctwgt = xtwgt = 0; + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + xtwgt += adjwgt[jj]; + if (vec[adjncy[jj]] > 0) + ctwgt += vec[adjncy[jj]] + adjwgt[jj]; + else if (adjncy[jj] == i) { + ctwgt += adjwgt[jj]; + xtwgt -= adjwgt[jj]; + } + } + + score = 1.0*ctwgt/(mytwgt+xtwgt-ctwgt); + if (score > bscore) { + bscore = score; + maxidx = ii; + } + marker[ii] = i; + } +#endif + + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + iii = adjncy[jj]; + + if (marker[iii] == i || match[iii] != UNMATCHED || vwgt[i]+vwgt[iii] > maxvwgt[0]) + continue; + + ctwgt = xtwgt = 0; + for (jjj=xadj[iii]; jjj<xadj[iii+1]; jjj++) { + xtwgt += 1;//adjwgt[jjj]; + if (vec[adjncy[jjj]] > 0) + ctwgt += 2;//vec[adjncy[jjj]] + adjwgt[jjj]; + else if (adjncy[jjj] == i) + ctwgt += 10*adjwgt[jjj]; + } + + score = 1.0*ctwgt/(mytwgt+xtwgt); + //printf("%"PRIDX" %"PRIDX" %"PRIDX" %.4f\n", mytwgt, xtwgt, ctwgt, score); + if (score > bscore) { + bscore = score; + maxidx = iii; + } + marker[iii] = i; + } + } + + /* reset vec array */ + for (j=xadj[i]; j<xadj[i+1]; j++) + vec[adjncy[j]] = -1; + } + } + else { + /* multi-constraint version */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (match[k] == UNMATCHED && + ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt)) { + maxidx = k; + break; + } + } + } + } + } + + if (maxidx != UNMATCHED) { + cmap[i] = cmap[maxidx] = cnvtxs++; + match[i] = maxidx; + match[maxidx] = i; + } + } + } + + + /* match the final unmatched vertices with themselves and reorder the vertices + of the coarse graph for memory-friendly contraction */ + for (cnvtxs=0, i=0; i<nvtxs; i++) { + if (match[i] == UNMATCHED) { + match[i] = i; + cmap[i] = cnvtxs++; + } + else { + if (i <= match[i]) + cmap[i] = cmap[match[i]] = cnvtxs++; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr)); + + CreateCoarseGraph(ctrl, graph, cnvtxs, match); + + WCOREPOP; + + return cnvtxs; +} + + +/*************************************************************************/ +/*! This function prints various stats for each graph during coarsening + */ +/*************************************************************************/ +void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i; + + printf("%10"PRIDX" %10"PRIDX" %10"PRIDX" [%"PRIDX"] [", + graph->nvtxs, graph->nedges, isum(graph->nedges, graph->adjwgt, 1), ctrl->CoarsenTo); + + for (i=0; i<graph->ncon; i++) + printf(" %8"PRIDX":%8"PRIDX, ctrl->maxvwgt[i], graph->tvwgt[i]); + printf(" ]\n"); +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. It uses a simple hash-table + for identifying the adjacent vertices that get collapsed to the same + node. The hash-table can have conflicts, which are handled via a + linear scan. + */ +/*************************************************************************/ +void CreateCoarseGraph0(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match) +{ + idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + int mask, dovsize, dropedges; + idx_t cv, nkeep, droppedewgt; + idx_t *keys=NULL, *medianewgts=NULL, *noise=NULL; + + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + dropedges = ctrl->dropedges; + + /* Check if the mask-version of the code is a good choice */ + mask = HTLENGTH; + if (cnvtxs < 2*mask || graph->nedges/graph->nvtxs > mask/20) { + CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match); + return; + } + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + for (v=0; v<nvtxs; v++) { + if (xadj[v+1]-xadj[v] > (mask>>3)) { + CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match); + return; + } + } + + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + ncon = graph->ncon; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Setup structures for dropedges */ + if (dropedges) { + for (nkeep=-1, v=0; v<nvtxs; v++) + nkeep = gk_max(nkeep, xadj[v+1]-xadj[v]); + + medianewgts = iwspacemalloc(ctrl, cnvtxs); + noise = iwspacemalloc(ctrl, cnvtxs); + keys = iwspacemalloc(ctrl, 2*(nkeep+1)); + + for (v=0; v<cnvtxs; v++) + noise[v] = irandInRange(128); + } + + /* Initialize the coarser graph */ + cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize); + cxadj = cgraph->xadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(gk_min(cnvtxs+1, mask+1), -1, iwspacemalloc(ctrl, mask+1)); + + cxadj[0] = cnvtxs = cnedges = 0; + for (v=0; v<nvtxs; v++) { + if ((u = match[v]) < v) + continue; + + ASSERT(cmap[v] == cnvtxs); + ASSERT(cmap[match[v]] == cnvtxs); + + if (ncon == 1) + cvwgt[cnvtxs] = vwgt[v]; + else + icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon); + + if (dovsize) + cvsize[cnvtxs] = vsize[v]; + + nedges = 0; + + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + if (v != u) { + if (ncon == 1) + cvwgt[cnvtxs] += vwgt[u]; + else + iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1); + + if (dovsize) + cvsize[cnvtxs] += vsize[u]; + + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + /* Remove the contracted adjacency weight */ + jj = htable[cnvtxs&mask]; + if (jj >= 0 && cadjncy[jj] != cnvtxs) { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == cnvtxs) + break; + } + } + /* This 2nd check is needed for non-adjacent matchings */ + if (jj >= 0 && jj < nedges && cadjncy[jj] == cnvtxs) { + cadjncy[jj] = cadjncy[--nedges]; + cadjwgt[jj] = cadjwgt[nedges]; + } + } + + /* Zero out the htable */ + for (j=0; j<nedges; j++) + htable[cadjncy[j]&mask] = -1; + htable[cnvtxs&mask] = -1; + + /* Determine the median weight of the incident edges, which will be used + to keep an edge (u, v) iff wgt(u, v) >= min(medianewgts[u], medianewgts[v]) */ + if (dropedges) { + for (j=0; j<nedges; j++) + keys[j] = (cadjwgt[j]<<8) + noise[cnvtxs] + noise[cadjncy[j]]; + isortd(nedges, keys); + medianewgts[cnvtxs] = keys[((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u])>>1)]; + } + + cadjncy += nedges; + cadjwgt += nedges; + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + } + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + if (dropedges) { + droppedewgt = 0; + + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + cnedges = 0; + for (u=0; u<cnvtxs; u++) { + istart = cxadj[u]; + iend = cxadj[u+1]; + for (j=istart; j<iend; j++) { + v = cadjncy[j]; + if ((cadjwgt[j]<<8) + noise[u] + noise[v] >= gk_min(medianewgts[u], medianewgts[v])) { + cadjncy[cnedges] = cadjncy[j]; + cadjwgt[cnedges++] = cadjwgt[j]; + } + else + droppedewgt += cadjwgt[j]; + } + cxadj[u] = cnedges; + } + SHIFTCSR(j, cnvtxs, cxadj); + + //printf("droppedewgt: %d\n", (int)droppedewgt); + + cgraph->droppedewgt = droppedewgt; + } + + cgraph->nedges = cnedges; + + for (j=0; j<ncon; j++) { + cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon); + cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1); + } + + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. It uses a simple hash-table + for identifying the adjacent vertices that get collapsed to the same + node. The hash-table can have conflicts, which are handled via a + linear scan. + */ +/*************************************************************************/ +void CreateCoarseGraph1(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match) +{ + idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, + cnedges, v, u, mask; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable, *table; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + int dovsize, dropedges, usemask; + idx_t cv, nkeep, droppedewgt; + idx_t *keys=NULL, *medianewgts=NULL, *noise=NULL; + + WCOREPUSH; + + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + dropedges = ctrl->dropedges; + + mask = HTLENGTH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Setup structures for dropedges */ + if (dropedges) { + for (nkeep=-1, v=0; v<nvtxs; v++) + nkeep = gk_max(nkeep, xadj[v+1]-xadj[v]); + + medianewgts = iwspacemalloc(ctrl, cnvtxs); + noise = iwspacemalloc(ctrl, cnvtxs); + keys = iwspacemalloc(ctrl, 2*(nkeep+1)); + + for (v=0; v<cnvtxs; v++) + noise[v] = irandInRange(128); + } + + /* Initialize the coarser graph */ + cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize); + cxadj = cgraph->xadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(gk_min(cnvtxs+1, mask+1), -1, iwspacemalloc(ctrl, mask+1)); + table = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs)); + + cxadj[0] = cnvtxs = cnedges = 0; + for (v=0; v<nvtxs; v++) { + if ((u = match[v]) < v) + continue; + + ASSERT(cmap[v] == cnvtxs); + ASSERT(cmap[match[v]] == cnvtxs); + + /* take care of the vertices */ + if (ncon == 1) + cvwgt[cnvtxs] = vwgt[v]; + else + icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon); + + if (dovsize) + cvsize[cnvtxs] = vsize[v]; + + if (v != u) { + if (ncon == 1) + cvwgt[cnvtxs] += vwgt[u]; + else + iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1); + + if (dovsize) + cvsize[cnvtxs] += vsize[u]; + } + + + /* take care of the edges */ + usemask = ((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u]) > (mask>>3) ? 0 : 1); + nedges = 0; + + + if (usemask) { + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + if (v != u) { + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + /* Remove the contracted adjacency weight */ + jj = htable[cnvtxs&mask]; + if (jj >= 0 && cadjncy[jj] != cnvtxs) { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == cnvtxs) + break; + } + } + /* This 2nd check is needed for non-adjacent matchings */ + if (jj >= 0 && jj < nedges && cadjncy[jj] == cnvtxs) { + cadjncy[jj] = cadjncy[--nedges]; + cadjwgt[jj] = cadjwgt[nedges]; + } + } + + /* Zero out the htable */ + for (j=0; j<nedges; j++) + htable[cadjncy[j]&mask] = -1; + htable[cnvtxs&mask] = -1; + } + else { + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = table[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + table[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + if (v != u) { + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = table[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + table[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + /* Remove the contracted adjacency weight */ + if ((j = table[cnvtxs]) != -1) { + ASSERT(cadjncy[j] == cnvtxs); + cadjncy[j] = cadjncy[--nedges]; + cadjwgt[j] = cadjwgt[nedges]; + table[cnvtxs] = -1; + } + } + + /* Zero out the htable */ + for (j=0; j<nedges; j++) + table[cadjncy[j]] = -1; + } + + + /* Determine the median weight of the incident edges, which will be used + to keep an edge (u, v) iff wgt(u, v) >= min(medianewgts[u], medianewgts[v]) */ + if (dropedges) { + for (j=0; j<nedges; j++) + keys[j] = (cadjwgt[j]<<8) + noise[cnvtxs] + noise[cadjncy[j]]; + isortd(nedges, keys); + medianewgts[cnvtxs] = keys[((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u])>>1)]; + } + + cadjncy += nedges; + cadjwgt += nedges; + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + } + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + if (dropedges) { + droppedewgt = 0; + + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + cnedges = 0; + for (u=0; u<cnvtxs; u++) { + istart = cxadj[u]; + iend = cxadj[u+1]; + for (j=istart; j<iend; j++) { + v = cadjncy[j]; + if ((cadjwgt[j]<<8) + noise[u] + noise[v] >= gk_min(medianewgts[u], medianewgts[v])) { + cadjncy[cnedges] = cadjncy[j]; + cadjwgt[cnedges++] = cadjwgt[j]; + } + else + droppedewgt += cadjwgt[j]; + } + cxadj[u] = cnedges; + } + SHIFTCSR(j, cnvtxs, cxadj); + + //printf("droppedewgt: %d\n", (int)droppedewgt); + + cgraph->droppedewgt = droppedewgt; + } + + cgraph->nedges = cnedges; + + for (j=0; j<ncon; j++) { + cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon); + cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1); + } + + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. Depending on the size of the + candidate adjancency lists it either uses a hash table or an array + to do duplicate detection. + */ +/*************************************************************************/ +void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match) +{ + idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, + cnedges, v, u, mask; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable, *table; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + int dovsize, dropedges; + idx_t cv, nkeep, droppedewgt; + idx_t *keys=NULL, *medianewgts=NULL, *noise=NULL; + + WCOREPUSH; + + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + dropedges = ctrl->dropedges; + + mask = HTLENGTH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Setup structures for dropedges */ + if (dropedges) { + for (nkeep=-1, v=0; v<nvtxs; v++) + nkeep = gk_max(nkeep, xadj[v+1]-xadj[v]); + + medianewgts = iwspacemalloc(ctrl, cnvtxs); + noise = iwspacemalloc(ctrl, cnvtxs); + keys = iwspacemalloc(ctrl, 2*(nkeep+1)); + + for (v=0; v<cnvtxs; v++) + noise[v] = irandInRange(128); + } + + /* Initialize the coarser graph */ + cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize); + cxadj = cgraph->xadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(gk_min(cnvtxs+1, mask+1), -1, iwspacemalloc(ctrl, mask+1)); + table = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs)); + + cxadj[0] = cnvtxs = cnedges = 0; + for (v=0; v<nvtxs; v++) { + if ((u = match[v]) < v) + continue; + + ASSERT(cmap[v] == cnvtxs); + ASSERT(cmap[match[v]] == cnvtxs); + + /* take care of the vertices */ + if (ncon == 1) + cvwgt[cnvtxs] = vwgt[v]; + else + icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon); + + if (dovsize) + cvsize[cnvtxs] = vsize[v]; + + if (v != u) { + if (ncon == 1) + cvwgt[cnvtxs] += vwgt[u]; + else + iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1); + + if (dovsize) + cvsize[cnvtxs] += vsize[u]; + } + + + /* take care of the edges */ + if ((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u]) < (mask>>2)) { /* use mask */ + /* put the ID of the contracted node itself at the start, so that it can be + * removed easily */ + htable[cnvtxs&mask] = 0; + cadjncy[0] = cnvtxs; + nedges = 1; + + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + for (kk=k&mask; htable[kk]!=-1 && cadjncy[htable[kk]]!=k; kk=((kk+1)%mask)); + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + if (v != u) { + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + for (kk=k&mask; htable[kk]!=-1 && cadjncy[htable[kk]]!=k; kk=((kk+1)%mask)); + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + } + + /* zero out the htable */ + for (j=0; j<nedges; j++) { + k = cadjncy[j]; + for (kk=k&mask; cadjncy[htable[kk]]!=k; kk=((kk+1)%mask)); + htable[kk] = -1; + } + + /* remove the contracted vertex from the list */ + cadjncy[0] = cadjncy[--nedges]; + cadjwgt[0] = cadjwgt[nedges]; + } + else { + nedges = 0; + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = table[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + table[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + if (v != u) { + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = table[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + table[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + /* Remove the contracted adjacency weight */ + if ((j = table[cnvtxs]) != -1) { + ASSERT(cadjncy[j] == cnvtxs); + cadjncy[j] = cadjncy[--nedges]; + cadjwgt[j] = cadjwgt[nedges]; + table[cnvtxs] = -1; + } + } + + /* Zero out the htable */ + for (j=0; j<nedges; j++) + table[cadjncy[j]] = -1; + } + + + /* Determine the median weight of the incident edges, which will be used + to keep an edge (u, v) iff wgt(u, v) >= min(medianewgts[u], medianewgts[v]) */ + if (dropedges) { + for (j=0; j<nedges; j++) + keys[j] = (cadjwgt[j]<<8) + noise[cnvtxs] + noise[cadjncy[j]]; + isortd(nedges, keys); + medianewgts[cnvtxs] = keys[((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u])>>1)]; + } + + cadjncy += nedges; + cadjwgt += nedges; + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + } + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + if (dropedges) { + droppedewgt = 0; + + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + cnedges = 0; + for (u=0; u<cnvtxs; u++) { + istart = cxadj[u]; + iend = cxadj[u+1]; + for (j=istart; j<iend; j++) { + v = cadjncy[j]; + if ((cadjwgt[j]<<8) + noise[u] + noise[v] >= gk_min(medianewgts[u], medianewgts[v])) { + cadjncy[cnedges] = cadjncy[j]; + cadjwgt[cnedges++] = cadjwgt[j]; + } + else + droppedewgt += cadjwgt[j]; + } + cxadj[u] = cnedges; + } + SHIFTCSR(j, cnvtxs, cxadj); + + //printf("droppedewgt: %d\n", (int)droppedewgt); + + cgraph->droppedewgt = droppedewgt; + } + + cgraph->nedges = cnedges; + + for (j=0; j<ncon; j++) { + cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon); + cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1); + } + + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. It uses a full-size array + (htable) for identifying the adjacent vertices that get collapsed to + the same node. + */ +/*************************************************************************/ +void CreateCoarseGraphNoMask(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match) +{ + idx_t j, k, m, istart, iend, v, u, nvtxs, nedges, ncon, cnedges; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + int dovsize, dropedges; + idx_t cv, nkeep, droppedewgt; + idx_t *keys=NULL, *medianewgts=NULL, *noise=NULL; + + WCOREPUSH; + + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + dropedges = ctrl->dropedges; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Setup structures for dropedges */ + if (dropedges) { + for (nkeep=-1, v=0; v<nvtxs; v++) + nkeep = gk_max(nkeep, xadj[v+1]-xadj[v]); + + medianewgts = iwspacemalloc(ctrl, cnvtxs); + noise = iwspacemalloc(ctrl, cnvtxs); + keys = iwspacemalloc(ctrl, 2*(nkeep+1)); + + for (v=0; v<cnvtxs; v++) + noise[v] = irandInRange(128); + } + + /* Initialize the coarser graph */ + cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize); + cxadj = cgraph->xadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs)); + + cxadj[0] = cnvtxs = cnedges = 0; + for (v=0; v<nvtxs; v++) { + if ((u = match[v]) < v) + continue; + + ASSERT(cmap[v] == cnvtxs); + ASSERT(cmap[match[v]] == cnvtxs); + + if (ncon == 1) + cvwgt[cnvtxs] = vwgt[v]; + else + icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon); + + if (dovsize) + cvsize[cnvtxs] = vsize[v]; + + nedges = 0; + + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = htable[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + if (v != u) { + if (ncon == 1) + cvwgt[cnvtxs] += vwgt[u]; + else + iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1); + + if (dovsize) + cvsize[cnvtxs] += vsize[u]; + + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + if ((m = htable[k]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[k] = nedges++; + } + else { + cadjwgt[m] += adjwgt[j]; + } + } + + /* Remove the contracted adjacency weight */ + if ((j = htable[cnvtxs]) != -1) { + ASSERT(cadjncy[j] == cnvtxs); + cadjncy[j] = cadjncy[--nedges]; + cadjwgt[j] = cadjwgt[nedges]; + htable[cnvtxs] = -1; + } + } + + /* Zero out the htable */ + for (j=0; j<nedges; j++) + htable[cadjncy[j]] = -1; + + + /* Determine the median weight of the incident edges, which will be used + to keep an edge (u, v) iff wgt(u, v) >= min(medianewgts[u], medianewgts[v]) */ + if (dropedges) { + for (j=0; j<nedges; j++) + keys[j] = (cadjwgt[j]<<8) + noise[cnvtxs] + noise[cadjncy[j]]; + isortd(nedges, keys); + medianewgts[cnvtxs] = keys[((xadj[v+1]-xadj[v] + xadj[u+1]-xadj[u])>>1)]; + } + + /* Record Advance the cadjXXX pointers */ + cadjncy += nedges; + cadjwgt += nedges; + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + } + + + /* compact the adjacency structure of the coarser graph to keep only +ve edges */ + if (dropedges) { + droppedewgt = 0; + + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + cnedges = 0; + for (u=0; u<cnvtxs; u++) { + istart = cxadj[u]; + iend = cxadj[u+1]; + for (j=istart; j<iend; j++) { + v = cadjncy[j]; + if ((cadjwgt[j]<<8) + noise[u] + noise[v] >= gk_min(medianewgts[u], medianewgts[v])) { + cadjncy[cnedges] = cadjncy[j]; + cadjwgt[cnedges++] = cadjwgt[j]; + } + else + droppedewgt += cadjwgt[j]; + } + cxadj[u] = cnedges; + } + SHIFTCSR(j, cnvtxs, cxadj); + + //printf("droppedewgt: %d\n", (int)droppedewgt); + + cgraph->droppedewgt = droppedewgt; + } + + cgraph->nedges = cnedges; + + for (j=0; j<ncon; j++) { + cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon); + cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1); + } + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function creates the coarser graph. It uses a simple hash-table + for identifying the adjacent vertices that get collapsed to the same + node. The hash-table can have conflicts, which are handled via a + linear scan. It relies on the perm[] array to visit the vertices in + increasing cnvtxs order. + */ +/*************************************************************************/ +void CreateCoarseGraphPerm(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match, idx_t *perm) +{ + idx_t i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, + v, u, mask, dovsize; + idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt; + idx_t *cmap, *htable; + idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt; + graph_t *cgraph; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr)); + + dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0); + + mask = HTLENGTH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + cmap = graph->cmap; + + /* Initialize the coarser graph */ + cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize); + cxadj = cgraph->xadj; + cvwgt = cgraph->vwgt; + cvsize = cgraph->vsize; + cadjncy = cgraph->adjncy; + cadjwgt = cgraph->adjwgt; + + htable = iset(mask+1, -1, iwspacemalloc(ctrl, mask+1)); + + cxadj[0] = cnvtxs = cnedges = 0; + for (i=0; i<nvtxs; i++) { + v = perm[i]; + if (cmap[v] != cnvtxs) + continue; + + u = match[v]; + if (ncon == 1) + cvwgt[cnvtxs] = vwgt[v]; + else + icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon); + + if (dovsize) + cvsize[cnvtxs] = vsize[v]; + + nedges = 0; + + istart = xadj[v]; + iend = xadj[v+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + if (v != u) { + if (ncon == 1) + cvwgt[cnvtxs] += vwgt[u]; + else + iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1); + + if (dovsize) + cvsize[cnvtxs] += vsize[u]; + + istart = xadj[u]; + iend = xadj[u+1]; + for (j=istart; j<iend; j++) { + k = cmap[adjncy[j]]; + kk = k&mask; + if ((m = htable[kk]) == -1) { + cadjncy[nedges] = k; + cadjwgt[nedges] = adjwgt[j]; + htable[kk] = nedges++; + } + else if (cadjncy[m] == k) { + cadjwgt[m] += adjwgt[j]; + } + else { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == k) { + cadjwgt[jj] += adjwgt[j]; + break; + } + } + if (jj == nedges) { + cadjncy[nedges] = k; + cadjwgt[nedges++] = adjwgt[j]; + } + } + } + + /* Remove the contracted adjacency weight */ + jj = htable[cnvtxs&mask]; + if (jj >= 0 && cadjncy[jj] != cnvtxs) { + for (jj=0; jj<nedges; jj++) { + if (cadjncy[jj] == cnvtxs) + break; + } + } + if (jj >= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */ + cadjncy[jj] = cadjncy[--nedges]; + cadjwgt[jj] = cadjwgt[nedges]; + } + } + + for (j=0; j<nedges; j++) + htable[cadjncy[j]&mask] = -1; /* Zero out the htable */ + htable[cnvtxs&mask] = -1; + + cnedges += nedges; + cxadj[++cnvtxs] = cnedges; + cadjncy += nedges; + cadjwgt += nedges; + } + + cgraph->nedges = cnedges; + + for (i=0; i<ncon; i++) { + cgraph->tvwgt[i] = isum(cgraph->nvtxs, cgraph->vwgt+i, ncon); + cgraph->invtvwgt[i] = 1.0/(cgraph->tvwgt[i] > 0 ? cgraph->tvwgt[i] : 1); + } + + + ReAdjustMemory(ctrl, graph, cgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr)); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! Setup the various arrays for the coarse graph + */ +/*************************************************************************/ +graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, int dovsize) +{ + graph_t *cgraph; + + cgraph = CreateGraph(); + + cgraph->nvtxs = cnvtxs; + cgraph->ncon = graph->ncon; + + cgraph->finer = graph; + graph->coarser = cgraph; + + /* Allocate memory for the coarser graph */ + cgraph->xadj = imalloc(cnvtxs+1, "SetupCoarseGraph: xadj"); + cgraph->adjncy = imalloc(graph->nedges, "SetupCoarseGraph: adjncy"); + cgraph->adjwgt = imalloc(graph->nedges, "SetupCoarseGraph: adjwgt"); + cgraph->vwgt = imalloc(cgraph->ncon*cnvtxs, "SetupCoarseGraph: vwgt"); + cgraph->tvwgt = imalloc(cgraph->ncon, "SetupCoarseGraph: tvwgt"); + cgraph->invtvwgt = rmalloc(cgraph->ncon, "SetupCoarseGraph: invtvwgt"); + + if (dovsize) + cgraph->vsize = imalloc(cnvtxs, "SetupCoarseGraph: vsize"); + + return cgraph; +} + + +/*************************************************************************/ +/*! This function re-adjusts the amount of memory that was allocated if + it will lead to significant savings + */ +/*************************************************************************/ +void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph) +{ + if (cgraph->nedges > 10000 && cgraph->nedges < 0.9*graph->nedges) { + cgraph->adjncy = irealloc(cgraph->adjncy, cgraph->nedges, "ReAdjustMemory: adjncy"); + cgraph->adjwgt = irealloc(cgraph->adjwgt, cgraph->nedges, "ReAdjustMemory: adjwgt"); + } +} diff --git a/3rdParty/metis/metis-5.1.1/libmetis/compress.c b/3rdParty/metis/metis-5.1.1/libmetis/compress.c new file mode 100644 index 000000000..d72472b25 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/compress.c @@ -0,0 +1,229 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * compress.c + * + * This file contains code for compressing nodes with identical adjacency + * structure and for prunning dense columns + * + * Started 9/17/97 + * George + */ + +#include "metislib.h" + +/*************************************************************************/ +/*! This function compresses a graph by merging identical vertices + The compression should lead to at least 10% reduction. + + The compressed graph that is generated has its adjwgts set to 1. + + \returns 1 if compression was performed, otherwise it returns 0. + +*/ +/**************************************************************************/ +graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *cptr, idx_t *cind) +{ + idx_t i, ii, iii, j, jj, k, l, cnvtxs, cnedges; + idx_t *cxadj, *cadjncy, *cvwgt, *mark, *map; + ikv_t *keys; + graph_t *graph=NULL; + + mark = ismalloc(nvtxs, -1, "CompressGraph: mark"); + map = ismalloc(nvtxs, -1, "CompressGraph: map"); + keys = ikvmalloc(nvtxs, "CompressGraph: keys"); + + /* Compute a key for each adjacency list */ + for (i=0; i<nvtxs; i++) { + k = 0; + for (j=xadj[i]; j<xadj[i+1]; j++) + k += adjncy[j]; + keys[i].key = k+i; /* Add the diagonal entry as well */ + keys[i].val = i; + } + + ikvsorti(nvtxs, keys); + + l = cptr[0] = 0; + for (cnvtxs=i=0; i<nvtxs; i++) { + ii = keys[i].val; + if (map[ii] == -1) { + mark[ii] = i; /* Add the diagonal entry */ + for (j=xadj[ii]; j<xadj[ii+1]; j++) + mark[adjncy[j]] = i; + + map[ii] = cnvtxs; + cind[l++] = ii; + + for (j=i+1; j<nvtxs; j++) { + iii = keys[j].val; + + if (keys[i].key != keys[j].key || xadj[ii+1]-xadj[ii] != xadj[iii+1]-xadj[iii]) + break; /* Break if keys or degrees are different */ + + if (map[iii] == -1) { /* Do a comparison if iii has not been mapped */ + for (jj=xadj[iii]; jj<xadj[iii+1]; jj++) { + if (mark[adjncy[jj]] != i) + break; + } + + if (jj == xadj[iii+1]) { /* Identical adjacency structure */ + map[iii] = cnvtxs; + cind[l++] = iii; + } + } + } + + cptr[++cnvtxs] = l; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" Compression: reduction in # of vertices: %"PRIDX".\n", nvtxs-cnvtxs)); + + + if (cnvtxs < COMPRESSION_FRACTION*nvtxs) { + /* Sufficient compression is possible, so go ahead and create the + compressed graph */ + + graph = CreateGraph(); + + cnedges = 0; + for (i=0; i<cnvtxs; i++) { + ii = cind[cptr[i]]; + cnedges += xadj[ii+1]-xadj[ii]; + } + + /* Allocate memory for the compressed graph */ + cxadj = graph->xadj = imalloc(cnvtxs+1, "CompressGraph: xadj"); + cvwgt = graph->vwgt = ismalloc(cnvtxs, 0, "CompressGraph: vwgt"); + cadjncy = graph->adjncy = imalloc(cnedges, "CompressGraph: adjncy"); + graph->adjwgt = ismalloc(cnedges, 1, "CompressGraph: adjwgt"); + + /* Now go and compress the graph */ + iset(nvtxs, -1, mark); + l = cxadj[0] = 0; + for (i=0; i<cnvtxs; i++) { + mark[i] = i; /* Remove any dioganal entries in the compressed graph */ + for (j=cptr[i]; j<cptr[i+1]; j++) { + ii = cind[j]; + + /* accumulate the vertex weights of the consistuent vertices */ + cvwgt[i] += (vwgt == NULL ? 1 : vwgt[ii]); + + /* generate the combined adjancency list */ + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + k = map[adjncy[jj]]; + if (mark[k] != i) { + mark[k] = i; + cadjncy[l++] = k; + } + } + } + cxadj[i+1] = l; + } + + graph->nvtxs = cnvtxs; + graph->nedges = l; + graph->ncon = 1; + + SetupGraph_tvwgt(graph); + SetupGraph_label(graph); + } + + gk_free((void **)&keys, &map, &mark, LTERM); + + return graph; + +} + + + +/*************************************************************************/ +/*! This function prunes all the vertices in a graph with degree greater + than factor*average. + + \returns the number of vertices that were prunned. +*/ +/*************************************************************************/ +graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *iperm, real_t factor) +{ + idx_t i, j, k, l, nlarge, pnvtxs, pnedges; + idx_t *pxadj, *padjncy, *padjwgt, *pvwgt; + idx_t *perm; + graph_t *graph=NULL; + + perm = imalloc(nvtxs, "PruneGraph: perm"); + + factor = factor*xadj[nvtxs]/nvtxs; + + pnvtxs = pnedges = nlarge = 0; + for (i=0; i<nvtxs; i++) { + if (xadj[i+1]-xadj[i] < factor) { + perm[i] = pnvtxs; + iperm[pnvtxs++] = i; + pnedges += xadj[i+1]-xadj[i]; + } + else { + perm[i] = nvtxs - ++nlarge; + iperm[nvtxs-nlarge] = i; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" Pruned %"PRIDX" of %"PRIDX" vertices.\n", nlarge, nvtxs)); + + + if (nlarge > 0 && nlarge < nvtxs) { + /* Prunning is possible, so go ahead and create the prunned graph */ + graph = CreateGraph(); + + /* Allocate memory for the prunned graph*/ + pxadj = graph->xadj = imalloc(pnvtxs+1, "PruneGraph: xadj"); + pvwgt = graph->vwgt = imalloc(pnvtxs, "PruneGraph: vwgt"); + padjncy = graph->adjncy = imalloc(pnedges, "PruneGraph: adjncy"); + graph->adjwgt = ismalloc(pnedges, 1, "PruneGraph: adjwgt"); + + pxadj[0] = pnedges = l = 0; + for (i=0; i<nvtxs; i++) { + if (xadj[i+1]-xadj[i] < factor) { + pvwgt[l] = (vwgt == NULL ? 1 : vwgt[i]); + + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = perm[adjncy[j]]; + if (k < pnvtxs) + padjncy[pnedges++] = k; + } + pxadj[++l] = pnedges; + } + } + + graph->nvtxs = pnvtxs; + graph->nedges = pnedges; + graph->ncon = 1; + + SetupGraph_tvwgt(graph); + SetupGraph_label(graph); + } + else if (nlarge > 0 && nlarge == nvtxs) { + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" Pruning is ignored as it removes all vertices.\n")); + nlarge = 0; + } + + + gk_free((void **)&perm, LTERM); + + return graph; +} + + + + + + + + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/contig.c b/3rdParty/metis/metis-5.1.1/libmetis/contig.c new file mode 100644 index 000000000..3f45902db --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/contig.c @@ -0,0 +1,699 @@ +/*! +\file +\brief Functions that deal with eliminating disconnected partitions + +\date Started 7/15/98 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: contig.c 10513 2011-07-07 22:06:03Z karypis $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function finds the connected components induced by the + partitioning vector. + + \param graph is the graph structure + \param where is the partitioning vector. If this is NULL, then the + entire graph is treated to belong into a single partition. + \param cptr is the ptr structure of the CSR representation of the + components. The length of this vector must be graph->nvtxs+1. + \param cind is the indices structure of the CSR representation of + the components. The length of this vector must be graph->nvtxs. + + \returns the number of components that it found. + + \note The cptr and cind parameters can be NULL, in which case only the + number of connected components is returned. +*/ +/*************************************************************************/ +idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, + idx_t *cptr, idx_t *cind) +{ + idx_t i, ii, j, jj, k, me=0, nvtxs, first, last, nleft, ncmps; + idx_t *xadj, *adjncy; + idx_t *touched, *perm, *todo; + idx_t mustfree_ccsr=0, mustfree_where=0; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Deal with NULL supplied cptr/cind vectors */ + if (cptr == NULL) { + cptr = imalloc(nvtxs+1, "FindPartitionInducedComponents: cptr"); + cind = imalloc(nvtxs, "FindPartitionInducedComponents: cind"); + mustfree_ccsr = 1; + } + + /* Deal with NULL supplied where vector */ + if (where == NULL) { + where = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: where"); + mustfree_where = 1; + } + + /* Allocate memory required for the BFS traversal */ + perm = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: perm")); + todo = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: todo")); + touched = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: touched"); + + + /* Find the connected componends induced by the partition */ + ncmps = -1; + first = last = 0; + nleft = nvtxs; + while (nleft > 0) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; + ASSERT(touched[todo[0]] == 0); + i = todo[0]; + cind[last++] = i; + touched[i] = 1; + me = where[i]; + } + + i = cind[first++]; + k = perm[i]; + j = todo[k] = todo[--nleft]; + perm[j] = k; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (where[k] == me && !touched[k]) { + cind[last++] = k; + touched[k] = 1; + } + } + } + cptr[++ncmps] = first; + + if (mustfree_ccsr) + gk_free((void **)&cptr, &cind, LTERM); + if (mustfree_where) + gk_free((void **)&where, LTERM); + + gk_free((void **)&perm, &todo, &touched, LTERM); + + return ncmps; +} + + +/*************************************************************************/ +/*! This function computes a permutation of the vertices based on a + breadth-first-traversal. It can be used for re-ordering the graph + to reduce its bandwidth for better cache locality. + + \param ctrl is the control structure + \param graph is the graph structure + \param perm is the array that upon completion, perm[i] will store + the ID of the vertex that corresponds to the ith vertex in the + re-ordered graph. +*/ +/*************************************************************************/ +void ComputeBFSOrdering(ctrl_t *ctrl, graph_t *graph, idx_t *bfsperm) +{ + idx_t i, j, k, nvtxs, first, last; + idx_t *xadj, *adjncy, *perm; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Allocate memory required for the BFS traversal */ + perm = iincset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + + iincset(nvtxs, 0, bfsperm); /* this array will also store the vertices + still to be processed */ + + /* Find the connected componends induced by the partition */ + first = last = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = bfsperm[last]; + ASSERT(perm[k] != -1); + perm[k] = -1; /* mark node as being visited */ + last++; + } + + i = bfsperm[first++]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + /* if a node has been already been visited, its perm[] will be -1 */ + if (perm[k] != -1) { + /* perm[k] is the location within bfsperm of where k resides; + put in that location bfsperm[last] that we are about to + overwrite and update perm[bfsperm[last]] to reflect that. */ + bfsperm[perm[k]] = bfsperm[last]; + perm[bfsperm[last]] = perm[k]; + + bfsperm[last++] = k; /* put node at the end of the "queue" */ + perm[k] = -1; /* mark node as being visited */ + } + } + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function checks whether a graph is contiguous or not. + */ +/**************************************************************************/ +idx_t IsConnected(graph_t *graph, idx_t report) +{ + idx_t ncmps; + + ncmps = FindPartitionInducedComponents(graph, NULL, NULL, NULL); + + if (ncmps != 1 && report) + printf("The graph is not connected. It has %"PRIDX" connected components.\n", ncmps); + + return (ncmps == 1); +} + + +/*************************************************************************/ +/*! This function checks whether or not partition pid is contigous + */ +/*************************************************************************/ +idx_t IsConnectedSubdomain(ctrl_t *ctrl, graph_t *graph, idx_t pid, idx_t report) +{ + idx_t i, j, k, nvtxs, first, last, nleft, ncmps, wgt; + idx_t *xadj, *adjncy, *where, *touched, *queue; + idx_t *cptr; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + + touched = ismalloc(nvtxs, 0, "IsConnected: touched"); + queue = imalloc(nvtxs, "IsConnected: queue"); + cptr = imalloc(nvtxs+1, "IsConnected: cptr"); + + nleft = 0; + for (i=0; i<nvtxs; i++) { + if (where[i] == pid) + nleft++; + } + + for (i=0; i<nvtxs; i++) { + if (where[i] == pid) + break; + } + + touched[i] = 1; + queue[0] = i; + first = 0; last = 1; + + cptr[0] = 0; /* This actually points to queue */ + ncmps = 0; + while (first != nleft) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; + for (i=0; i<nvtxs; i++) { + if (where[i] == pid && !touched[i]) + break; + } + queue[last++] = i; + touched[i] = 1; + } + + i = queue[first++]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (where[k] == pid && !touched[k]) { + queue[last++] = k; + touched[k] = 1; + } + } + } + cptr[++ncmps] = first; + + if (ncmps > 1 && report) { + printf("The graph has %"PRIDX" connected components in partition %"PRIDX":\t", ncmps, pid); + for (i=0; i<ncmps; i++) { + wgt = 0; + for (j=cptr[i]; j<cptr[i+1]; j++) + wgt += graph->vwgt[queue[j]]; + printf("[%5"PRIDX" %5"PRIDX"] ", cptr[i+1]-cptr[i], wgt); + /* + if (cptr[i+1]-cptr[i] == 1) + printf("[%"PRIDX" %"PRIDX"] ", queue[cptr[i]], xadj[queue[cptr[i]]+1]-xadj[queue[cptr[i]]]); + */ + } + printf("\n"); + } + + gk_free((void **)&touched, &queue, &cptr, LTERM); + + return (ncmps == 1 ? 1 : 0); +} + + +/*************************************************************************/ +/*! This function identifies the number of connected components in a graph + that result after removing the vertices that belong to the vertex + separator (i.e., graph->where[i] == 2). + The connected component memberships are returned in the CSR-style + pair of arrays cptr, cind. +*/ +/**************************************************************************/ +idx_t FindSepInducedComponents(ctrl_t *ctrl, graph_t *graph, idx_t *cptr, + idx_t *cind) +{ + idx_t i, j, k, nvtxs, first, last, nleft, ncmps, wgt; + idx_t *xadj, *adjncy, *where, *touched, *queue; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + + touched = ismalloc(nvtxs, 0, "IsConnected: queue"); + + for (i=0; i<graph->nbnd; i++) + touched[graph->bndind[i]] = 1; + + queue = cind; + + nleft = 0; + for (i=0; i<nvtxs; i++) { + if (where[i] != 2) + nleft++; + } + + for (i=0; i<nvtxs; i++) { + if (where[i] != 2) + break; + } + + touched[i] = 1; + queue[0] = i; + first = 0; + last = 1; + cptr[0] = 0; /* This actually points to queue */ + ncmps = 0; + + while (first != nleft) { + if (first == last) { /* Find another starting vertex */ + cptr[++ncmps] = first; + for (i=0; i<nvtxs; i++) { + if (!touched[i]) + break; + } + queue[last++] = i; + touched[i] = 1; + } + + i = queue[first++]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (!touched[k]) { + queue[last++] = k; + touched[k] = 1; + } + } + } + cptr[++ncmps] = first; + + gk_free((void **)&touched, LTERM); + + return ncmps; +} + + +/*************************************************************************/ +/*! This function finds all the connected components induced by the + partitioning vector in graph->where and tries to push them around to + remove some of them. */ +/*************************************************************************/ +void EliminateComponents(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, jj, k, me, nparts, nvtxs, ncon, ncmps, other, + ncand, target; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts; + idx_t *cptr, *cind, *cpvec, *pcptr, *pcind, *cwhere; + idx_t cid, bestcid, *cwgt, *bestcwgt; + idx_t ntodo, oldntodo, *todo; + rkv_t *cand; + real_t *tpwgts; + idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL; /* volume specific work arrays */ + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt); + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + cptr = iwspacemalloc(ctrl, nvtxs+1); + cind = iwspacemalloc(ctrl, nvtxs); + + ncmps = FindPartitionInducedComponents(graph, where, cptr, cind); + + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, + printf("I found %"PRIDX" components, for this %"PRIDX"-way partition\n", + ncmps, nparts)); + + /* There are more components than partitions */ + if (ncmps > nparts) { + cwgt = iwspacemalloc(ctrl, ncon); + bestcwgt = iwspacemalloc(ctrl, ncon); + cpvec = iwspacemalloc(ctrl, nparts); + pcptr = iset(nparts+1, 0, iwspacemalloc(ctrl, nparts+1)); + pcind = iwspacemalloc(ctrl, ncmps); + cwhere = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + todo = iwspacemalloc(ctrl, ncmps); + cand = (rkv_t *)wspacemalloc(ctrl, nparts*sizeof(rkv_t)); + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + } + + + /* Get a CSR representation of the components-2-partitions mapping */ + for (i=0; i<ncmps; i++) + pcptr[where[cind[cptr[i]]]]++; + MAKECSR(i, nparts, pcptr); + for (i=0; i<ncmps; i++) + pcind[pcptr[where[cind[cptr[i]]]]++] = i; + SHIFTCSR(i, nparts, pcptr); + + /* Assign the heaviest component of each partition to its original partition */ + for (ntodo=0, i=0; i<nparts; i++) { + if (pcptr[i+1]-pcptr[i] == 1) + bestcid = pcind[pcptr[i]]; + else { + for (bestcid=-1, j=pcptr[i]; j<pcptr[i+1]; j++) { + cid = pcind[j]; + iset(ncon, 0, cwgt); + for (ii=cptr[cid]; ii<cptr[cid+1]; ii++) + iaxpy(ncon, 1, vwgt+cind[ii]*ncon, 1, cwgt, 1); + if (bestcid == -1 || isum(ncon, bestcwgt, 1) < isum(ncon, cwgt, 1)) { + bestcid = cid; + icopy(ncon, cwgt, bestcwgt); + } + } + /* Keep track of those that need to be dealt with */ + for (j=pcptr[i]; j<pcptr[i+1]; j++) { + if (pcind[j] != bestcid) + todo[ntodo++] = pcind[j]; + } + } + + for (j=cptr[bestcid]; j<cptr[bestcid+1]; j++) { + ASSERT(where[cind[j]] == i); + cwhere[cind[j]] = i; + } + } + + + while (ntodo > 0) { + oldntodo = ntodo; + for (i=0; i<ntodo; i++) { + cid = todo[i]; + me = where[cind[cptr[cid]]]; /* Get the domain of this component */ + + /* Determine the weight of the block to be moved */ + iset(ncon, 0, cwgt); + for (j=cptr[cid]; j<cptr[cid+1]; j++) + iaxpy(ncon, 1, vwgt+cind[j]*ncon, 1, cwgt, 1); + + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, + printf("Trying to move %"PRIDX" [%"PRIDX"] from %"PRIDX"\n", + cid, isum(ncon, cwgt, 1), me)); + + /* Determine the connectivity */ + iset(nparts, 0, cpvec); + for (j=cptr[cid]; j<cptr[cid+1]; j++) { + ii = cind[j]; + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) + if (cwhere[adjncy[jj]] != -1) + cpvec[cwhere[adjncy[jj]]] += (adjwgt ? adjwgt[jj] : 1); + } + + /* Put the neighbors into a cand[] array for sorting */ + for (ncand=0, j=0; j<nparts; j++) { + if (cpvec[j] > 0) { + cand[ncand].key = cpvec[j]; + cand[ncand++].val = j; + } + } + if (ncand == 0) + continue; + + rkvsortd(ncand, cand); + + /* Limit the moves to only the top candidates, which are defined as + those with connectivity at least 50% of the best. + This applies only when ncon=1, as for multi-constraint, balancing + will be hard. */ + if (ncon == 1) { + for (j=1; j<ncand; j++) { + if (cand[j].key < .5*cand[0].key) + break; + } + ncand = j; + } + + /* Now among those, select the one with the best balance */ + target = cand[0].val; + for (j=1; j<ncand; j++) { + if (BetterBalanceKWay(ncon, cwgt, ctrl->ubfactors, + 1, pwgts+target*ncon, ctrl->pijbm+target*ncon, + 1, pwgts+cand[j].val*ncon, ctrl->pijbm+cand[j].val*ncon)) + target = cand[j].val; + } + + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, + printf("\tMoving it to %"PRIDX" [%"PRIDX"] [%"PRIDX"]\n", target, cpvec[target], ncand)); + + /* Note that as a result of a previous movement, a connected component may + now will like to stay to its original partition */ + if (target != me) { + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + MoveGroupContigForCut(ctrl, graph, target, cid, cptr, cind); + break; + + case METIS_OBJTYPE_VOL: + MoveGroupContigForVol(ctrl, graph, target, cid, cptr, cind, + vmarker, pmarker, modind); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype %d\n", ctrl->objtype); + } + } + + /* Update the cwhere vector */ + for (j=cptr[cid]; j<cptr[cid+1]; j++) + cwhere[cind[j]] = target; + + todo[i] = todo[--ntodo]; + } + if (oldntodo == ntodo) { + IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, printf("Stopped at ntodo: %"PRIDX"\n", ntodo)); + break; + } + } + + for (i=0; i<nvtxs; i++) + ASSERT(where[i] == cwhere[i]); + + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo + */ +/*************************************************************************/ +void MoveGroupContigForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind) +{ + idx_t i, ii, iii, j, jj, k, l, nvtxs, nbnd, from, me; + idx_t *xadj, *adjncy, *adjwgt, *where, *bndptr, *bndind; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + nbnd = graph->nbnd; + + for (iii=ptr[gid]; iii<ptr[gid+1]; iii++) { + i = ind[iii]; + from = where[i]; + + myrinfo = graph->ckrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = to; + mynbrs[k].ed = 0; + myrinfo->nnbrs++; + } + + graph->mincut -= mynbrs[k].ed-myrinfo->id; + + /* Update ID/ED and BND related information for the moved vertex */ + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, BNDTYPE_REFINE); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE); + } + + ASSERT(CheckRInfo(ctrl, graph->ckrinfo+i)); + } + + graph->nbnd = nbnd; +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo + */ +/*************************************************************************/ +void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker, + idx_t *modind) +{ + idx_t i, ii, iii, j, jj, k, l, nvtxs, from, me, other, xgain; + idx_t *xadj, *vsize, *adjncy, *where; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + + for (iii=ptr[gid]; iii<ptr[gid+1]; iii++) { + i = ind[iii]; + from = where[i]; + + myrinfo = graph->vkrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0); + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + if (myrinfo->nid > 0) + xgain -= vsize[i]; + + /* determine the volume gain resulting from that move */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + ASSERT(other != to) + + if (from == other) { + /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + } + else { + /* Remote vertex: increase if 'to' is a new subdomain */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + + /* Remote vertex: decrease if i is the only connection to 'from' */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == from && onbrs[l].ned == 1) { + xgain += vsize[ii]; + break; + } + } + } + } + graph->minvol -= xgain; + graph->mincut -= -myrinfo->nid; + } + else { + graph->minvol -= (xgain + mynbrs[k].gv); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + } + + + /* Update where and pwgts */ + where[i] = to; + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update the id/ed/gains/bnd of potentially affected nodes */ + KWayVolUpdate(ctrl, graph, i, from, to, NULL, NULL, NULL, NULL, + NULL, BNDTYPE_REFINE, vmarker, pmarker, modind); + + /*CheckKWayVolPartitionParams(ctrl, graph);*/ + } + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERTP(ComputeVolume(graph, where) == graph->minvol, + ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol)); + +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/debug.c b/3rdParty/metis/metis-5.1.1/libmetis/debug.c new file mode 100644 index 000000000..47aba6165 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/debug.c @@ -0,0 +1,461 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * debug.c + * + * This file contains code that performs self debuging + * + * Started 7/24/97 + * George + * + */ + +#include "metislib.h" + + + +/*************************************************************************/ +/*! This function computes the total edgecut + */ +/*************************************************************************/ +idx_t ComputeCut(graph_t *graph, idx_t *where) +{ + idx_t i, j, cut; + + if (graph->adjwgt == NULL) { + for (cut=0, i=0; i<graph->nvtxs; i++) { + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cut++; + } + } + else { + for (cut=0, i=0; i<graph->nvtxs; i++) { + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cut += graph->adjwgt[j]; + } + } + + return cut/2; +} + + +/*************************************************************************/ +/*! This function computes the total volume + */ +/*************************************************************************/ +idx_t ComputeVolume(graph_t *graph, idx_t *where) +{ + idx_t i, j, k, me, nvtxs, nparts, totalv; + idx_t *xadj, *adjncy, *vsize, *marker; + + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vsize = graph->vsize; + + nparts = where[iargmax(nvtxs, where,1)]+1; + marker = ismalloc(nparts, -1, "ComputeVolume: marker"); + + totalv = 0; + + for (i=0; i<nvtxs; i++) { + marker[where[i]] = i; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = where[adjncy[j]]; + if (marker[k] != i) { + marker[k] = i; + totalv += (vsize ? vsize[i] : 1); + } + } + } + + gk_free((void **)&marker, LTERM); + + return totalv; +} + + +/*************************************************************************/ +/*! This function computes the cut given the graph and a where vector + */ +/*************************************************************************/ +idx_t ComputeMaxCut(graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i, j, maxcut; + idx_t *cuts; + + cuts = ismalloc(nparts, 0, "ComputeMaxCut: cuts"); + + if (graph->adjwgt == NULL) { + for (i=0; i<graph->nvtxs; i++) { + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cuts[where[i]]++; + } + } + else { + for (i=0; i<graph->nvtxs; i++) { + for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) + if (where[i] != where[graph->adjncy[j]]) + cuts[where[i]] += graph->adjwgt[j]; + } + } + + maxcut = cuts[iargmax(nparts, cuts,1)]; + + printf("%zu => %"PRIDX"\n", iargmax(nparts, cuts,1), maxcut); + + gk_free((void **)&cuts, LTERM); + + return maxcut; +} + + +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckBnd(graph_t *graph) +{ + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + for (nbnd=0, i=0; i<nvtxs; i++) { + if (xadj[i+1]-xadj[i] == 0) + nbnd++; /* Islands are considered to be boundary vertices */ + + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (where[i] != where[adjncy[j]]) { + nbnd++; + ASSERT(bndptr[i] != -1); + ASSERT(bndind[bndptr[i]] == i); + break; + } + } + } + + ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd)); + + return 1; +} + + + +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckBnd2(graph_t *graph) +{ + idx_t i, j, nvtxs, nbnd, id, ed; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + for (nbnd=0, i=0; i<nvtxs; i++) { + id = ed = 0; + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (where[i] != where[adjncy[j]]) + ed += graph->adjwgt[j]; + else + id += graph->adjwgt[j]; + } + if (ed - id >= 0 && xadj[i] < xadj[i+1]) { + nbnd++; + ASSERTP(bndptr[i] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", i, id, ed)); + ASSERT(bndind[bndptr[i]] == i); + } + } + + ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd)); + + return 1; +} + + +/*************************************************************************/ +/*! This function checks whether or not the boundary information is correct + */ +/*************************************************************************/ +idx_t CheckNodeBnd(graph_t *graph, idx_t onbnd) +{ + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *where, *bndptr, *bndind; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + for (nbnd=0, i=0; i<nvtxs; i++) { + if (where[i] == 2) + nbnd++; + } + + ASSERTP(nbnd == onbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, onbnd)); + + for (i=0; i<nvtxs; i++) { + if (where[i] != 2) { + ASSERTP(bndptr[i] == -1, ("%"PRIDX" %"PRIDX"\n", i, bndptr[i])); + } + else { + ASSERTP(bndptr[i] != -1, ("%"PRIDX" %"PRIDX"\n", i, bndptr[i])); + } + } + + return 1; +} + + + +/*************************************************************************/ +/*! This function checks whether or not the rinfo of a vertex is consistent + */ +/*************************************************************************/ +idx_t CheckRInfo(ctrl_t *ctrl, ckrinfo_t *rinfo) +{ + idx_t i, j; + cnbr_t *nbrs; + + nbrs = ctrl->cnbrpool + rinfo->inbr; + + for (i=0; i<rinfo->nnbrs; i++) { + for (j=i+1; j<rinfo->nnbrs; j++) + ASSERTP(nbrs[i].pid != nbrs[j].pid, + ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, j, nbrs[i].pid, nbrs[j].pid)); + } + + return 1; +} + + + +/*************************************************************************/ +/*! This function checks the correctness of the NodeFM data structures + */ +/*************************************************************************/ +idx_t CheckNodePartitionParams(graph_t *graph) +{ + idx_t i, j, k, l, nvtxs, me, other; + idx_t *xadj, *adjncy, *adjwgt, *vwgt, *where; + idx_t edegrees[2], pwgts[3]; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + + /*------------------------------------------------------------ + / Compute now the separator external degrees + /------------------------------------------------------------*/ + pwgts[0] = pwgts[1] = pwgts[2] = 0; + for (i=0; i<nvtxs; i++) { + me = where[i]; + pwgts[me] += vwgt[i]; + + if (me == 2) { /* If it is on the separator do some computations */ + edegrees[0] = edegrees[1] = 0; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + other = where[adjncy[j]]; + if (other != 2) + edegrees[other] += vwgt[adjncy[j]]; + } + if (edegrees[0] != graph->nrinfo[i].edegrees[0] || + edegrees[1] != graph->nrinfo[i].edegrees[1]) { + printf("Something wrong with edegrees: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, edegrees[0], edegrees[1], + graph->nrinfo[i].edegrees[0], graph->nrinfo[i].edegrees[1]); + return 0; + } + } + } + + if (pwgts[0] != graph->pwgts[0] || + pwgts[1] != graph->pwgts[1] || + pwgts[2] != graph->pwgts[2]) { + printf("Something wrong with part-weights: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", pwgts[0], pwgts[1], pwgts[2], graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]); + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function checks if the separator is indeed a separator + */ +/*************************************************************************/ +idx_t IsSeparable(graph_t *graph) +{ + idx_t i, j, nvtxs, other; + idx_t *xadj, *adjncy, *where; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + where = graph->where; + + for (i=0; i<nvtxs; i++) { + if (where[i] == 2) + continue; + other = (where[i]+1)%2; + for (j=xadj[i]; j<xadj[i+1]; j++) { + ASSERTP(where[adjncy[j]] != other, + ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, where[i], adjncy[j], where[adjncy[j]], xadj[i+1]-xadj[i], + xadj[adjncy[j]+1]-xadj[adjncy[j]])); + } + } + + return 1; +} + + +/*************************************************************************/ +/*! This function recomputes the vrinfo fields and checks them against + those in the graph->vrinfo structure */ +/*************************************************************************/ +void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, kk, l, nvtxs, nbnd, mincut, minvol, me, other, pid; + idx_t *xadj, *vsize, *adjncy, *pwgts, *where, *bndind, *bndptr; + vkrinfo_t *rinfo, *myrinfo, *orinfo, tmprinfo; + vnbr_t *mynbrs, *onbrs, *tmpnbrs; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + rinfo = graph->vkrinfo; + + tmpnbrs = (vnbr_t *)wspacemalloc(ctrl, ctrl->nparts*sizeof(vnbr_t)); + + /*------------------------------------------------------------ + / Compute now the iv/ev degrees + /------------------------------------------------------------*/ + for (i=0; i<nvtxs; i++) { + me = where[i]; + + myrinfo = rinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + for (k=0; k<myrinfo->nnbrs; k++) + tmpnbrs[k] = mynbrs[k]; + + tmprinfo.nnbrs = myrinfo->nnbrs; + tmprinfo.nid = myrinfo->nid; + tmprinfo.ned = myrinfo->ned; + + myrinfo = &tmprinfo; + mynbrs = tmpnbrs; + + for (k=0; k<myrinfo->nnbrs; k++) + mynbrs[k].gv = 0; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = rinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + if (me == other) { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + pid = mynbrs[k].pid; + for (kk=0; kk<orinfo->nnbrs; kk++) { + if (onbrs[kk].pid == pid) + break; + } + if (kk == orinfo->nnbrs) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + /* Find the orinfo[me].ed and see if I'm the only connection */ + for (k=0; k<orinfo->nnbrs; k++) { + if (onbrs[k].pid == me) + break; + } + + if (onbrs[k].ned == 1) { /* I'm the only connection of 'ii' in 'me' */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].gv += vsize[ii]; + break; + } + } + + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; k<myrinfo->nnbrs; k++) { + if ((pid = mynbrs[k].pid) == other) + continue; + for (kk=0; kk<orinfo->nnbrs; kk++) { + if (onbrs[kk].pid == pid) { + mynbrs[k].gv += vsize[ii]; + break; + } + } + } + + } + else { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if ((pid = mynbrs[k].pid) == other) + continue; + for (kk=0; kk<orinfo->nnbrs; kk++) { + if (onbrs[kk].pid == pid) + break; + } + if (kk == orinfo->nnbrs) + mynbrs[k].gv -= vsize[ii]; + } + } + } + } + + myrinfo = rinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + for (k=0; k<myrinfo->nnbrs; k++) { + pid = mynbrs[k].pid; + for (kk=0; kk<tmprinfo.nnbrs; kk++) { + if (tmpnbrs[kk].pid == pid) { + if (tmpnbrs[kk].gv != mynbrs[k].gv) + printf("[%8"PRIDX" %8"PRIDX" %8"PRIDX" %+8"PRIDX" %+8"PRIDX"]\n", + i, where[i], pid, mynbrs[k].gv, tmpnbrs[kk].gv); + break; + } + } + } + + } + + WCOREPOP; +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/defs.h b/3rdParty/metis/metis-5.1.1/libmetis/defs.h new file mode 100644 index 000000000..7d837e265 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/defs.h @@ -0,0 +1,60 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * defs.h + * + * This file contains constant definitions + * + * Started 8/27/94 + * George + * + * $Id: defs.h 20398 2016-11-22 17:17:12Z karypis $ + * + */ + +#ifndef _LIBMETIS_DEFS_H_ +#define _LIBMETIS_DEFS_H_ + +#define METISTITLE "METIS 5.2 Copyright 1998-16, Regents of the University of Minnesota\n" +#define MAXLINE 1280000 + +#define LTERM (void **) 0 /* List terminator for gk_free() */ + +#define HTLENGTH ((1<<13)-1) + +#define INIT_MAXNAD 200 /* Initial number of maximum number of + adjacent domains. This number will be + adjusted as required. */ + +/* Types of boundaries */ +#define BNDTYPE_REFINE 1 /* Used for k-way refinement-purposes */ +#define BNDTYPE_BALANCE 2 /* Used for k-way balancing purposes */ + +/* Mode of optimization */ +#define OMODE_REFINE 1 /* Optimize the objective function */ +#define OMODE_BALANCE 2 /* Balance the subdomains */ + +/* Types of vertex statues in the priority queue */ +#define VPQSTATUS_PRESENT 1 /* The vertex is in the queue */ +#define VPQSTATUS_EXTRACTED 2 /* The vertex has been extracted from the queue */ +#define VPQSTATUS_NOTPRESENT 3 /* The vertex is not present in the queue and + has not been extracted before */ + +#define UNMATCHED -1 + +#define LARGENIPARTS 7 /* Number of random initial partitions */ +#define SMALLNIPARTS 5 /* Number of random initial partitions */ + +#define COARSEN_FRACTION 0.85 /* Node reduction between succesive coarsening levels */ + +#define COMPRESSION_FRACTION 0.85 + +#define MMDSWITCH 120 + +/* Default ufactors for the various operational modes */ +#define PMETIS_DEFAULT_UFACTOR 1 +#define MCPMETIS_DEFAULT_UFACTOR 10 +#define KMETIS_DEFAULT_UFACTOR 30 +#define OMETIS_DEFAULT_UFACTOR 200 + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/fm.c b/3rdParty/metis/metis-5.1.1/libmetis/fm.c new file mode 100644 index 000000000..7f5ea6b01 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/fm.c @@ -0,0 +1,543 @@ +/*! +\file +\brief Functions for the edge-based FM refinement + +\date Started 7/23/97 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: fm.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/************************************************************************* +* This function performs an edge-based FM refinement +**************************************************************************/ +void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) +{ + if (graph->ncon == 1) + FM_2WayCutRefine(ctrl, graph, ntpwgts, niter); + else + FM_Mc2WayCutRefine(ctrl, graph, ntpwgts, niter); +} + + +/*************************************************************************/ +/*! This function performs a cut-focused FM refinement */ +/*************************************************************************/ +void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) +{ + idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; + idx_t *moved, *swaps, *perm; + rpq_t *queues[2]; + idx_t higain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; + idx_t tpwgts[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + tpwgts[0] = graph->tvwgt[0]*ntpwgts[0]; + tpwgts[1] = graph->tvwgt[0]-tpwgts[0]; + + limit = gk_min(gk_max(0.01*nvtxs, 15), 100); + avgvwgt = gk_min((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); + + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, 0, -2)); + + origdiff = iabs(tpwgts[0]-pwgts[0]); + iset(nvtxs, -1, moved); + for (pass=0; pass<niter; pass++) { /* Do a number of passes */ + rpqReset(queues[0]); + rpqReset(queues[1]); + + mincutorder = -1; + newcut = mincut = initcut = graph->mincut; + mindiff = iabs(tpwgts[0]-pwgts[0]); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert boundary nodes in the priority queues */ + nbnd = graph->nbnd; + irandArrayPermute(nbnd, perm, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = perm[ii]; + ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); + ASSERT(bndptr[bndind[i]] != -1); + rpqInsert(queues[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); + } + + for (nswaps=0; nswaps<nvtxs; nswaps++) { + from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1); + to = (from+1)%2; + + if ((higain = rpqGetTop(queues[from])) == -1) + break; + ASSERT(bndptr[higain] != -1); + + newcut -= (ed[higain]-id[higain]); + INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); + + if ((newcut < mincut && iabs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || + (newcut == mincut && iabs(tpwgts[0]-pwgts[0]) < mindiff)) { + mincut = newcut; + mindiff = iabs(tpwgts[0]-pwgts[0]); + mincutorder = nswaps; + } + else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ + newcut += (ed[higain]-id[higain]); + INC_DEC(pwgts[from], pwgts[to], vwgt[higain]); + break; + } + + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + /* Update its boundary information and queue position */ + if (bndptr[k] != -1) { /* If k was a boundary vertex */ + if (ed[k] == 0) { /* Not a boundary vertex any more */ + BNDDelete(nbnd, bndind, bndptr, k); + if (moved[k] == -1) /* Remove it if in the queues */ + rpqDelete(queues[where[k]], k); + } + else { /* If it has not been moved, update its position in the queue */ + if (moved[k] == -1) + rpqUpdate(queues[where[k]], k, ed[k]-id[k]); + } + } + else { + if (ed[k] > 0) { /* It will now become a boundary vertex */ + BNDInsert(nbnd, bndind, bndptr, k); + if (moved[k] == -1) + rpqInsert(queues[where[k]], k, ed[k]-id[k]); + } + } + } + + } + + + /**************************************************************** + * Roll back computations + *****************************************************************/ + for (i=0; i<nswaps; i++) + moved[swaps[i]] = -1; /* reset moved array */ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + to = where[higain] = (where[higain]+1)%2; + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + else if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]); + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + if (bndptr[k] != -1 && ed[k] == 0) + BNDDelete(nbnd, bndind, bndptr, k); + if (bndptr[k] == -1 && ed[k] > 0) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + graph->mincut = mincut; + graph->nbnd = nbnd; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, 0, mincutorder)); + + if (mincutorder <= 0 || mincut == initcut) + break; + } + + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs a cut-focused multi-constraint FM refinement */ +/*************************************************************************/ +void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter) +{ + idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, + me, limit, tmp, cnum; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *id, *ed, + *bndptr, *bndind; + idx_t *moved, *swaps, *perm, *qnum; + idx_t higain, mincut, initcut, newcut, mincutorder; + real_t *invtvwgt, *ubfactors, *minbalv, *newbalv; + real_t origbal, minbal, newbal, rgain, ffactor; + rpq_t **queues; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + invtvwgt = graph->invtvwgt; + where = graph->where; + id = graph->id; + ed = graph->ed; + pwgts = graph->pwgts; + bndptr = graph->bndptr; + bndind = graph->bndind; + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + qnum = iwspacemalloc(ctrl, nvtxs); + ubfactors = rwspacemalloc(ctrl, ncon); + newbalv = rwspacemalloc(ctrl, ncon); + minbalv = rwspacemalloc(ctrl, ncon); + + limit = gk_min(gk_max(0.01*nvtxs, 25), 150); + + + /* Determine a fudge factor to allow the refinement routines to get out + of tight balancing constraints. */ + ffactor = .5/gk_max(20, nvtxs); + + /* Initialize the queues */ + queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *)); + for (i=0; i<2*ncon; i++) + queues[i] = rpqCreate(nvtxs); + for (i=0; i<nvtxs; i++) + qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt); + + /* Determine the unbalance tolerance for each constraint. The tolerance is + equal to the maximum of the original load imbalance and the user-supplied + allowed tolerance. The rationale behind this approach is to allow the + refinement routine to improve the cut, without having to worry about fixing + load imbalance problems. The load imbalance is addressed by the balancing + routines. */ + origbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, ubfactors); + for (i=0; i<ncon; i++) + ubfactors[i] = (ubfactors[i] > 0 ? ctrl->ubfactors[i]+ubfactors[i] : ctrl->ubfactors[i]); + + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, origbal, -2)); + + iset(nvtxs, -1, moved); + for (pass=0; pass<niter; pass++) { /* Do a number of passes */ + for (i=0; i<2*ncon; i++) + rpqReset(queues[i]); + + mincutorder = -1; + newcut = mincut = initcut = graph->mincut; + + minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, minbalv); + + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERT(CheckBnd(graph)); + + /* Insert boundary nodes in the priority queues */ + nbnd = graph->nbnd; + irandArrayPermute(nbnd, perm, nbnd/5, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[perm[ii]]; + ASSERT(ed[i] > 0 || id[i] == 0); + ASSERT(bndptr[i] != -1); + //rgain = 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1); + //rgain = (ed[i]-id[i] > 0 ? 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1) : ed[i]-id[i]); + rgain = ed[i]-id[i]; + rpqInsert(queues[2*qnum[i]+where[i]], i, rgain); + } + + for (nswaps=0; nswaps<nvtxs; nswaps++) { + SelectQueue(graph, ctrl->pijbm, ubfactors, queues, &from, &cnum); + + to = (from+1)%2; + + if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1) + break; + ASSERT(bndptr[higain] != -1); + + newcut -= (ed[higain]-id[higain]); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, newbalv); + + if ((newcut < mincut && newbal <= ffactor) || + (newcut == mincut && (newbal < minbal || + (newbal == minbal && BetterBalance2Way(ncon, minbalv, newbalv))))) { + mincut = newcut; + minbal = newbal; + mincutorder = nswaps; + rcopy(ncon, newbalv, minbalv); + } + else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ + newcut += (ed[higain]-id[higain]); + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + break; + } + + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + if (ctrl->dbglvl&METIS_DBG_MOVEINFO) { + printf("Moved%6"PRIDX" from %"PRIDX"(%"PRIDX") Gain:%5"PRIDX", " + "Cut:%5"PRIDX", NPwgts:", higain, from, cnum, ed[higain]-id[higain], newcut); + for (l=0; l<ncon; l++) + printf("(%.3"PRREAL" %.3"PRREAL")", pwgts[l]*invtvwgt[l], pwgts[ncon+l]*invtvwgt[l]); + printf(" %+.3"PRREAL" LB: %.3"PRREAL"(%+.3"PRREAL")\n", + minbal, ComputeLoadImbalance(graph, 2, ctrl->pijbm), newbal); + } + + + /************************************************************** + * Update the id[i]/ed[i] values of the affected nodes + ***************************************************************/ + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + /* Update its boundary information and queue position */ + if (bndptr[k] != -1) { /* If k was a boundary vertex */ + if (ed[k] == 0) { /* Not a boundary vertex any more */ + BNDDelete(nbnd, bndind, bndptr, k); + if (moved[k] == -1) /* Remove it if in the queues */ + rpqDelete(queues[2*qnum[k]+where[k]], k); + } + else { /* If it has not been moved, update its position in the queue */ + if (moved[k] == -1) { + //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1); + //rgain = (ed[k]-id[k] > 0 ? + // 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]); + rgain = ed[k]-id[k]; + rpqUpdate(queues[2*qnum[k]+where[k]], k, rgain); + } + } + } + else { + if (ed[k] > 0) { /* It will now become a boundary vertex */ + BNDInsert(nbnd, bndind, bndptr, k); + if (moved[k] == -1) { + //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1); + //rgain = (ed[k]-id[k] > 0 ? + // 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]); + rgain = ed[k]-id[k]; + rpqInsert(queues[2*qnum[k]+where[k]], k, rgain); + } + } + } + } + + } + + + /**************************************************************** + * Roll back computations + *****************************************************************/ + for (i=0; i<nswaps; i++) + moved[swaps[i]] = -1; /* reset moved array */ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + to = where[higain] = (where[higain]+1)%2; + SWAP(id[higain], ed[higain], tmp); + if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) + BNDDelete(nbnd, bndind, bndptr, higain); + else if (ed[higain] > 0 && bndptr[higain] == -1) + BNDInsert(nbnd, bndind, bndptr, higain); + + iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1); + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); + INC_DEC(id[k], ed[k], kwgt); + + if (bndptr[k] != -1 && ed[k] == 0) + BNDDelete(nbnd, bndind, bndptr, k); + if (bndptr[k] == -1 && ed[k] > 0) + BNDInsert(nbnd, bndind, bndptr, k); + } + } + + graph->mincut = mincut; + graph->nbnd = nbnd; + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + Print2WayRefineStats(ctrl, graph, ntpwgts, minbal, mincutorder)); + + if (mincutorder <= 0 || mincut == initcut) + break; + } + + for (i=0; i<2*ncon; i++) + rpqDestroy(queues[i]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function selects the partition number and the queue from which + we will move vertices out. */ +/*************************************************************************/ +void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, + rpq_t **queues, idx_t *from, idx_t *cnum) +{ + idx_t ncon, i, part; + real_t max, tmp; + + ncon = graph->ncon; + + *from = -1; + *cnum = -1; + + /* First determine the side and the queue, irrespective of the presence of nodes. + The side & queue is determined based on the most violated balancing constraint. */ + for (max=0.0, part=0; part<2; part++) { + for (i=0; i<ncon; i++) { + tmp = graph->pwgts[part*ncon+i]*pijbm[part*ncon+i] - ubfactors[i]; + /* the '=' in the test bellow is to ensure that under tight constraints + the partition that is at the max is selected */ + if (tmp >= max) { + max = tmp; + *from = part; + *cnum = i; + } + } + } + + + if (*from != -1) { + /* in case the desired queue is empty, select a queue from the same side */ + if (rpqLength(queues[2*(*cnum)+(*from)]) == 0) { + for (i=0; i<ncon; i++) { + if (rpqLength(queues[2*i+(*from)]) > 0) { + max = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i]; + *cnum = i; + break; + } + } + + for (i++; i<ncon; i++) { + tmp = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i]; + if (tmp > max && rpqLength(queues[2*i+(*from)]) > 0) { + max = tmp; + *cnum = i; + } + } + } + + /* + printf("Selected1 %"PRIDX"(%"PRIDX") -> %"PRIDX" [%5"PRREAL"]\n", + *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); + */ + } + else { + /* the partitioning does not violate balancing constraints, in which case select + a queue based on cut criteria */ + for (part=0; part<2; part++) { + for (i=0; i<ncon; i++) { + if (rpqLength(queues[2*i+part]) > 0 && + (*from == -1 || rpqSeeTopKey(queues[2*i+part]) > max)) { + max = rpqSeeTopKey(queues[2*i+part]); + *from = part; + *cnum = i; + } + } + } + /* + printf("Selected2 %"PRIDX"(%"PRIDX") -> %"PRIDX"\n", + *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); + */ + } +} + + +/*************************************************************************/ +/*! Prints statistics about the refinement */ +/*************************************************************************/ +void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + real_t deltabal, idx_t mincutorder) +{ + int i; + + if (mincutorder == -2) { + printf("Parts: "); + printf("Nv-Nb[%5"PRIDX" %5"PRIDX"] ICut: %6"PRIDX, + graph->nvtxs, graph->nbnd, graph->mincut); + printf(" ["); + for (i=0; i<graph->ncon; i++) + printf("(%.3"PRREAL" %.3"PRREAL" T:%.3"PRREAL" %.3"PRREAL")", + graph->pwgts[i]*graph->invtvwgt[i], + graph->pwgts[graph->ncon+i]*graph->invtvwgt[i], + ntpwgts[i], ntpwgts[graph->ncon+i]); + printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", + ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal); + } + else { + printf("\tMincut: %6"PRIDX" at %5"PRIDX" NBND %6"PRIDX" NPwgts: [", + graph->mincut, mincutorder, graph->nbnd); + for (i=0; i<graph->ncon; i++) + printf("(%.3"PRREAL" %.3"PRREAL")", + graph->pwgts[i]*graph->invtvwgt[i], graph->pwgts[graph->ncon+i]*graph->invtvwgt[i]); + printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", + ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal); + } +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/fortran.c b/3rdParty/metis/metis-5.1.1/libmetis/fortran.c new file mode 100644 index 000000000..5c3ed9029 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/fortran.c @@ -0,0 +1,142 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * fortran.c + * + * This file contains code for the fortran to C interface + * + * Started 8/19/97 + * George + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function changes the numbering to start from 0 instead of 1 */ +/*************************************************************************/ +void Change2CNumbering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy) +{ + idx_t i; + + for (i=0; i<=nvtxs; i++) + xadj[i]--; + + for (i=0; i<xadj[nvtxs]; i++) + adjncy[i]--; +} + + +/*************************************************************************/ +/*! This function changes the numbering to start from 1 instead of 0 */ +/*************************************************************************/ +void Change2FNumbering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vector) +{ + idx_t i; + + for (i=0; i<nvtxs; i++) + vector[i]++; + + for (i=0; i<xadj[nvtxs]; i++) + adjncy[i]++; + + for (i=0; i<=nvtxs; i++) + xadj[i]++; +} + +/*************************************************************************/ +/*! This function changes the numbering to start from 1 instead of 0 */ +/*************************************************************************/ +void Change2FNumbering2(idx_t nvtxs, idx_t *xadj, idx_t *adjncy) +{ + idx_t i, nedges; + + nedges = xadj[nvtxs]; + for (i=0; i<nedges; i++) + adjncy[i]++; + + for (i=0; i<=nvtxs; i++) + xadj[i]++; +} + + + +/*************************************************************************/ +/*! This function changes the numbering to start from 1 instead of 0 */ +/*************************************************************************/ +void Change2FNumberingOrder(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *v1, idx_t *v2) +{ + idx_t i, nedges; + + for (i=0; i<nvtxs; i++) { + v1[i]++; + v2[i]++; + } + + nedges = xadj[nvtxs]; + for (i=0; i<nedges; i++) + adjncy[i]++; + + for (i=0; i<=nvtxs; i++) + xadj[i]++; + +} + + + +/*************************************************************************/ +/*! This function changes the numbering to start from 0 instead of 1 */ +/*************************************************************************/ +void ChangeMesh2CNumbering(idx_t n, idx_t *ptr, idx_t *ind) +{ + idx_t i; + + for (i=0; i<=n; i++) + ptr[i]--; + for (i=0; i<ptr[n]; i++) + ind[i]--; +} + + +/*************************************************************************/ +/*! This function changes the numbering to start from 1 instead of 0 */ +/*************************************************************************/ +void ChangeMesh2FNumbering(idx_t n, idx_t *ptr, idx_t *ind, idx_t nvtxs, + idx_t *xadj, idx_t *adjncy) +{ + idx_t i; + + for (i=0; i<ptr[n]; i++) + ind[i]++; + for (i=0; i<=n; i++) + ptr[i]++; + + for (i=0; i<xadj[nvtxs]; i++) + adjncy[i]++; + for (i=0; i<=nvtxs; i++) + xadj[i]++; +} + + +/*************************************************************************/ +/*! This function changes the numbering to start from 1 instead of 0 */ +/*************************************************************************/ +void ChangeMesh2FNumbering2(idx_t ne, idx_t nn, idx_t *ptr, idx_t *ind, + idx_t *epart, idx_t *npart) +{ + idx_t i; + + for (i=0; i<ptr[ne]; i++) + ind[i]++; + for (i=0; i<=ne; i++) + ptr[i]++; + + for (i=0; i<ne; i++) + epart[i]++; + + for (i=0; i<nn; i++) + npart[i]++; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/frename.c b/3rdParty/metis/metis-5.1.1/libmetis/frename.c new file mode 100644 index 000000000..3d43c3ade --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/frename.c @@ -0,0 +1,136 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * Frename.c + * + * THis file contains some renaming routines to deal with different Fortran compilers + * + * Started 9/15/97 + * George + * + */ + + +#include "metislib.h" + +#define FRENAME(name, dargs, cargs, name1, name2, name3, name4) \ + int name1 dargs { return name cargs; } \ + int name2 dargs { return name cargs; } \ + int name3 dargs { return name cargs; } \ + int name4 dargs { return name cargs; } + + +FRENAME( + METIS_PartGraphRecursive, + (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part), + (nvtxs, ncon, xadj, adjncy, vwgt, + vsize, adjwgt, nparts, tpwgts, + ubvec, options, edgecut, part), + METIS_PARTGRAPHRECURSIVE, + metis_partgraphrecursive, + metis_partgraphrecursive_, + metis_partgraphrecursive__ +) + + +FRENAME( + METIS_PartGraphKway, + (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, + real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part), + (nvtxs, ncon, xadj, adjncy, vwgt, + vsize, adjwgt, nparts, tpwgts, + ubvec, options, edgecut, part), + METIS_PARTGRAPHKWAY, + metis_partgraphkway, + metis_partgraphkway_, + metis_partgraphkway__ +) + +FRENAME( + METIS_MeshToDual, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *ncommon, idx_t *numflag, + idx_t **r_xadj, idx_t **r_adjncy), + (ne, nn, eptr, eind, ncommon, numflag, r_xadj, r_adjncy), + METIS_MESHTODUAL, + metis_meshtodual, + metis_meshtodual_, + metis_meshtodual__ +) + + +FRENAME( + METIS_MeshToNodal, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *numflag, idx_t **r_xadj, + idx_t **r_adjncy), + (ne, nn, eptr, eind, numflag, r_xadj, r_adjncy), + METIS_MESHTONODAL, + metis_meshtonodal, + metis_meshtonodal_, + metis_meshtonodal__ +) + + +FRENAME( + METIS_PartMeshNodal, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, + idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart), + (ne, nn, eptr, eind, vwgt, vsize, nparts, tpwgts, options, objval, epart, npart), + METIS_PARTMESHNODAL, + metis_partmeshnodal, + metis_partmeshnodal_, + metis_partmeshnodal__ +) + + +FRENAME( + METIS_PartMeshDual, + (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, + idx_t *ncommon, idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, + idx_t *epart, idx_t *npart), + (ne, nn, eptr, eind, vwgt, vsize, ncommon, nparts, tpwgts, options, objval, epart, npart), + METIS_PARTMESHDUAL, + metis_partmeshdual, + metis_partmeshdual_, + metis_partmeshdual__ +) + + +FRENAME( + METIS_NodeND, + (idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *options, idx_t *perm, + idx_t *iperm), + (nvtxs, xadj, adjncy, vwgt, options, perm, iperm), + METIS_NODEND, + metis_nodend, + metis_nodend_, + metis_nodend__ +) + + +FRENAME( + METIS_Free, + (void *ptr), + (ptr), + METIS_FREE, + metis_free, + metis_free_, + metis_free__ +) + + +FRENAME( + METIS_SetDefaultOptions, + (idx_t *options), + (options), + METIS_SETDEFAULTOPTIONS, + metis_setdefaultoptions, + metis_setdefaultoptions_, + metis_setdefaultoptions__ +) + + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/gklib.c b/3rdParty/metis/metis-5.1.1/libmetis/gklib.c new file mode 100644 index 000000000..4e17eac42 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/gklib.c @@ -0,0 +1,120 @@ +/*! +\file gklib.c +\brief Various helper routines generated using GKlib's templates + +\date Started 4/12/2007 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: gklib.c 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + + +/*************************************************************************/ +/*! BLAS routines */ +/*************************************************************************/ +GK_MKBLAS(i, idx_t, idx_t) +GK_MKBLAS(r, real_t, real_t) + +/*************************************************************************/ +/*! Memory allocation routines */ +/*************************************************************************/ +GK_MKALLOC(i, idx_t) +GK_MKALLOC(r, real_t) +GK_MKALLOC(ikv, ikv_t) +GK_MKALLOC(rkv, rkv_t) + +/*************************************************************************/ +/*! Priority queues routines */ +/*************************************************************************/ +#define key_gt(a, b) ((a) > (b)) +GK_MKPQUEUE(ipq, ipq_t, ikv_t, idx_t, idx_t, ikvmalloc, IDX_MAX, key_gt) +GK_MKPQUEUE(rpq, rpq_t, rkv_t, real_t, idx_t, rkvmalloc, REAL_MAX, key_gt) +#undef key_gt + +/*************************************************************************/ +/*! Random number generation routines */ +/*************************************************************************/ +GK_MKRANDOM(i, idx_t, idx_t) + +/*************************************************************************/ +/*! Utility routines */ +/*************************************************************************/ +GK_MKARRAY2CSR(i, idx_t) + +/*************************************************************************/ +/*! Sorting routines */ +/*************************************************************************/ +void isorti(size_t n, idx_t *base) +{ +#define i_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(idx_t, base, n, i_lt); +#undef i_lt +} + +void isortd(size_t n, idx_t *base) +{ +#define i_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(idx_t, base, n, i_gt); +#undef i_gt +} + +void rsorti(size_t n, real_t *base) +{ +#define r_lt(a, b) ((*a) < (*b)) + GK_MKQSORT(real_t, base, n, r_lt); +#undef r_lt +} + +void rsortd(size_t n, real_t *base) +{ +#define r_gt(a, b) ((*a) > (*b)) + GK_MKQSORT(real_t, base, n, r_gt); +#undef r_gt +} + +void ikvsorti(size_t n, ikv_t *base) +{ +#define ikey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_lt); +#undef ikey_lt +} + +/* Sorts based both on key and val */ +void ikvsortii(size_t n, ikv_t *base) +{ +#define ikeyval_lt(a, b) ((a)->key < (b)->key || ((a)->key == (b)->key && (a)->val < (b)->val)) + GK_MKQSORT(ikv_t, base, n, ikeyval_lt); +#undef ikeyval_lt +} + +void ikvsortd(size_t n, ikv_t *base) +{ +#define ikey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(ikv_t, base, n, ikey_gt); +#undef ikey_gt +} + +void rkvsorti(size_t n, rkv_t *base) +{ +#define rkey_lt(a, b) ((a)->key < (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_lt); +#undef rkey_lt +} + +void rkvsortd(size_t n, rkv_t *base) +{ +#define rkey_gt(a, b) ((a)->key > (b)->key) + GK_MKQSORT(rkv_t, base, n, rkey_gt); +#undef rkey_gt +} + +void uvwsorti(size_t n, uvw_t *base) +{ +#define uvwkey_lt(a, b) ((a)->u < (b)->u || ((a)->u == (b)->u && (a)->v < (b)->v)) + GK_MKQSORT(uvw_t, base, n, uvwkey_lt); +#undef uvwkey_lt +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/gklib_defs.h b/3rdParty/metis/metis-5.1.1/libmetis/gklib_defs.h new file mode 100644 index 000000000..dfac5ca67 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/gklib_defs.h @@ -0,0 +1,53 @@ +/*! +\file +\brief Data structures and prototypes for GKlib integration + +\date Started 12/23/2008 +\author George +\version\verbatim $Id: gklib_defs.h 10395 2011-06-23 23:28:06Z karypis $ \endverbatim +*/ + +#ifndef _LIBMETIS_GKLIB_H_ +#define _LIBMETIS_GKLIB_H_ + +#include "gklib_rename.h" + +/*************************************************************************/ +/*! Stores a weighted edge */ +/*************************************************************************/ +typedef struct { + idx_t u, v, w; /*!< Edge (u,v) with weight w */ +} uvw_t; + +/************************************************************************* +* Define various data structure using GKlib's templates. +**************************************************************************/ +GK_MKKEYVALUE_T(ikv_t, idx_t, idx_t) +GK_MKKEYVALUE_T(rkv_t, real_t, idx_t) +GK_MKPQUEUE_T(ipq_t, ikv_t) +GK_MKPQUEUE_T(rpq_t, rkv_t) + + +/* gklib.c */ +GK_MKBLAS_PROTO(i, idx_t, idx_t) +GK_MKBLAS_PROTO(r, real_t, real_t) +GK_MKALLOC_PROTO(i, idx_t) +GK_MKALLOC_PROTO(r, real_t) +GK_MKALLOC_PROTO(ikv, ikv_t) +GK_MKALLOC_PROTO(rkv, rkv_t) +GK_MKPQUEUE_PROTO(ipq, ipq_t, idx_t, idx_t) +GK_MKPQUEUE_PROTO(rpq, rpq_t, real_t, idx_t) +GK_MKRANDOM_PROTO(i, idx_t, idx_t) +GK_MKARRAY2CSR_PROTO(i, idx_t) +void isorti(size_t n, idx_t *base); +void isortd(size_t n, idx_t *base); +void rsorti(size_t n, real_t *base); +void rsortd(size_t n, real_t *base); +void ikvsorti(size_t n, ikv_t *base); +void ikvsortii(size_t n, ikv_t *base); +void ikvsortd(size_t n, ikv_t *base); +void rkvsorti(size_t n, rkv_t *base); +void rkvsortd(size_t n, rkv_t *base); +void uvwsorti(size_t n, uvw_t *base); + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/gklib_rename.h b/3rdParty/metis/metis-5.1.1/libmetis/gklib_rename.h new file mode 100644 index 000000000..78dc8b39e --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/gklib_rename.h @@ -0,0 +1,122 @@ +/*! +\file + + * Copyright 1997, Regents of the University of Minnesota + * + * This file contains header files + * + * Started 10/2/97 + * George + * + * $Id: gklib_rename.h 10395 2011-06-23 23:28:06Z karypis $ + * + */ + + +#ifndef _LIBMETIS_GKLIB_RENAME_H_ +#define _LIBMETIS_GKLIB_RENAME_H_ + +/* gklib.c - generated from the .o files using the ./utils/listundescapedsumbols.csh */ +#define iAllocMatrix libmetis__iAllocMatrix +#define iFreeMatrix libmetis__iFreeMatrix +#define iSetMatrix libmetis__iSetMatrix +#define iargmax libmetis__iargmax +#define iargmax_n libmetis__iargmax_n +#define iargmin libmetis__iargmin +#define iarray2csr libmetis__iarray2csr +#define iaxpy libmetis__iaxpy +#define icopy libmetis__icopy +#define idot libmetis__idot +#define iincset libmetis__iincset +#define ikvAllocMatrix libmetis__ikvAllocMatrix +#define ikvFreeMatrix libmetis__ikvFreeMatrix +#define ikvSetMatrix libmetis__ikvSetMatrix +#define ikvcopy libmetis__ikvcopy +#define ikvmalloc libmetis__ikvmalloc +#define ikvrealloc libmetis__ikvrealloc +#define ikvset libmetis__ikvset +#define ikvsmalloc libmetis__ikvsmalloc +#define ikvsortd libmetis__ikvsortd +#define ikvsorti libmetis__ikvsorti +#define ikvsortii libmetis__ikvsortii +#define imalloc libmetis__imalloc +#define imax libmetis__imax +#define imin libmetis__imin +#define inorm2 libmetis__inorm2 +#define ipqCheckHeap libmetis__ipqCheckHeap +#define ipqCreate libmetis__ipqCreate +#define ipqDelete libmetis__ipqDelete +#define ipqDestroy libmetis__ipqDestroy +#define ipqFree libmetis__ipqFree +#define ipqGetTop libmetis__ipqGetTop +#define ipqInit libmetis__ipqInit +#define ipqInsert libmetis__ipqInsert +#define ipqLength libmetis__ipqLength +#define ipqReset libmetis__ipqReset +#define ipqSeeKey libmetis__ipqSeeKey +#define ipqSeeTopKey libmetis__ipqSeeTopKey +#define ipqSeeTopVal libmetis__ipqSeeTopVal +#define ipqUpdate libmetis__ipqUpdate +#define isrand libmetis__isrand +#define irand libmetis__irand +#define irandArrayPermute libmetis__irandArrayPermute +#define irandArrayPermuteFine libmetis__irandArrayPermuteFine +#define irandInRange libmetis__irandInRange +#define irealloc libmetis__irealloc +#define iscale libmetis__iscale +#define iset libmetis__iset +#define ismalloc libmetis__ismalloc +#define isortd libmetis__isortd +#define isorti libmetis__isorti +#define isrand libmetis__isrand +#define isum libmetis__isum +#define rAllocMatrix libmetis__rAllocMatrix +#define rFreeMatrix libmetis__rFreeMatrix +#define rSetMatrix libmetis__rSetMatrix +#define rargmax libmetis__rargmax +#define rargmax_n libmetis__rargmax_n +#define rargmin libmetis__rargmin +#define raxpy libmetis__raxpy +#define rcopy libmetis__rcopy +#define rdot libmetis__rdot +#define rincset libmetis__rincset +#define rkvAllocMatrix libmetis__rkvAllocMatrix +#define rkvFreeMatrix libmetis__rkvFreeMatrix +#define rkvSetMatrix libmetis__rkvSetMatrix +#define rkvcopy libmetis__rkvcopy +#define rkvmalloc libmetis__rkvmalloc +#define rkvrealloc libmetis__rkvrealloc +#define rkvset libmetis__rkvset +#define rkvsmalloc libmetis__rkvsmalloc +#define rkvsortd libmetis__rkvsortd +#define rkvsorti libmetis__rkvsorti +#define rmalloc libmetis__rmalloc +#define rmax libmetis__rmax +#define rmin libmetis__rmin +#define rnorm2 libmetis__rnorm2 +#define rpqCheckHeap libmetis__rpqCheckHeap +#define rpqCreate libmetis__rpqCreate +#define rpqDelete libmetis__rpqDelete +#define rpqDestroy libmetis__rpqDestroy +#define rpqFree libmetis__rpqFree +#define rpqGetTop libmetis__rpqGetTop +#define rpqInit libmetis__rpqInit +#define rpqInsert libmetis__rpqInsert +#define rpqLength libmetis__rpqLength +#define rpqReset libmetis__rpqReset +#define rpqSeeKey libmetis__rpqSeeKey +#define rpqSeeTopKey libmetis__rpqSeeTopKey +#define rpqSeeTopVal libmetis__rpqSeeTopVal +#define rpqUpdate libmetis__rpqUpdate +#define rrealloc libmetis__rrealloc +#define rscale libmetis__rscale +#define rset libmetis__rset +#define rsmalloc libmetis__rsmalloc +#define rsortd libmetis__rsortd +#define rsorti libmetis__rsorti +#define rsum libmetis__rsum +#define uvwsorti libmetis__uvwsorti + +#endif + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/graph.c b/3rdParty/metis/metis-5.1.1/libmetis/graph.c new file mode 100644 index 000000000..5a2427649 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/graph.c @@ -0,0 +1,423 @@ +/** +\file +\brief Functions that deal with setting up the graphs for METIS. + +\date Started 7/25/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: graph.c 15817 2013-11-25 14:58:41Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function sets up the graph from the user input */ +/*************************************************************************/ +graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) +{ + idx_t i, j, k, sum; + real_t *nvwgt; + graph_t *graph; + + /* allocate the graph and fill in the fields */ + graph = CreateGraph(); + + graph->nvtxs = nvtxs; + graph->nedges = xadj[nvtxs]; + graph->ncon = ncon; + + graph->xadj = xadj; + graph->free_xadj = 0; + + graph->adjncy = adjncy; + graph->free_adjncy = 0; + + graph->droppedewgt = 0; + + /* setup the vertex weights */ + if (vwgt) { + graph->vwgt = vwgt; + graph->free_vwgt = 0; + } + else { + vwgt = graph->vwgt = ismalloc(ncon*nvtxs, 1, "SetupGraph: vwgt"); + } + + graph->tvwgt = imalloc(ncon, "SetupGraph: tvwgts"); + graph->invtvwgt = rmalloc(ncon, "SetupGraph: invtvwgts"); + for (i=0; i<ncon; i++) { + graph->tvwgt[i] = isum(nvtxs, vwgt+i, ncon); + graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1); + } + + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + /* Setup the vsize */ + if (vsize) { + graph->vsize = vsize; + graph->free_vsize = 0; + } + else { + vsize = graph->vsize = ismalloc(nvtxs, 1, "SetupGraph: vsize"); + } + + /* Allocate memory for edge weights and initialize them to the sum of the vsize */ + adjwgt = graph->adjwgt = imalloc(graph->nedges, "SetupGraph: adjwgt"); + for (i=0; i<nvtxs; i++) { + for (j=xadj[i]; j<xadj[i+1]; j++) + adjwgt[j] = 1+vsize[i]+vsize[adjncy[j]]; + } + } + else { /* For edgecut minimization */ + /* setup the edge weights */ + if (adjwgt) { + graph->adjwgt = adjwgt; + graph->free_adjwgt = 0; + } + else { + adjwgt = graph->adjwgt = ismalloc(graph->nedges, 1, "SetupGraph: adjwgt"); + } + } + + + /* setup various derived info */ + SetupGraph_tvwgt(graph); + + if (ctrl->optype == METIS_OP_PMETIS || ctrl->optype == METIS_OP_OMETIS) + SetupGraph_label(graph); + + ASSERT(CheckGraph(graph, ctrl->numflag, 1)); + + return graph; +} + + +/*************************************************************************/ +/*! Set's up the tvwgt/invtvwgt info */ +/*************************************************************************/ +void SetupGraph_tvwgt(graph_t *graph) +{ + idx_t i; + + if (graph->tvwgt == NULL) + graph->tvwgt = imalloc(graph->ncon, "SetupGraph_tvwgt: tvwgt"); + if (graph->invtvwgt == NULL) + graph->invtvwgt = rmalloc(graph->ncon, "SetupGraph_tvwgt: invtvwgt"); + + for (i=0; i<graph->ncon; i++) { + graph->tvwgt[i] = isum(graph->nvtxs, graph->vwgt+i, graph->ncon); + graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1); + } +} + + +/*************************************************************************/ +/*! Set's up the label info */ +/*************************************************************************/ +void SetupGraph_label(graph_t *graph) +{ + idx_t i; + + if (graph->label == NULL) + graph->label = imalloc(graph->nvtxs, "SetupGraph_label: label"); + + for (i=0; i<graph->nvtxs; i++) + graph->label[i] = i; +} + + +/*************************************************************************/ +/*! Setup the various arrays for the splitted graph */ +/*************************************************************************/ +graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges) +{ + graph_t *sgraph; + + sgraph = CreateGraph(); + + sgraph->nvtxs = snvtxs; + sgraph->nedges = snedges; + sgraph->ncon = graph->ncon; + + /* Allocate memory for the splitted graph */ + sgraph->xadj = imalloc(snvtxs+1, "SetupSplitGraph: xadj"); + sgraph->vwgt = imalloc(sgraph->ncon*snvtxs, "SetupSplitGraph: vwgt"); + sgraph->adjncy = imalloc(snedges, "SetupSplitGraph: adjncy"); + sgraph->adjwgt = imalloc(snedges, "SetupSplitGraph: adjwgt"); + sgraph->label = imalloc(snvtxs, "SetupSplitGraph: label"); + sgraph->tvwgt = imalloc(sgraph->ncon, "SetupSplitGraph: tvwgt"); + sgraph->invtvwgt = rmalloc(sgraph->ncon, "SetupSplitGraph: invtvwgt"); + + if (graph->vsize) + sgraph->vsize = imalloc(snvtxs, "SetupSplitGraph: vsize"); + + return sgraph; +} + + +/*************************************************************************/ +/*! This function creates and initializes a graph_t data structure */ +/*************************************************************************/ +graph_t *CreateGraph(void) +{ + graph_t *graph; + + graph = (graph_t *)gk_malloc(sizeof(graph_t), "CreateGraph: graph"); + + InitGraph(graph); + + return graph; +} + + +/*************************************************************************/ +/*! This function initializes a graph_t data structure */ +/*************************************************************************/ +void InitGraph(graph_t *graph) +{ + memset((void *)graph, 0, sizeof(graph_t)); + + /* graph size constants */ + graph->nvtxs = -1; + graph->nedges = -1; + graph->ncon = -1; + graph->mincut = -1; + graph->minvol = -1; + graph->nbnd = -1; + + /* memory for the graph structure */ + graph->xadj = NULL; + graph->vwgt = NULL; + graph->vsize = NULL; + graph->adjncy = NULL; + graph->adjwgt = NULL; + graph->label = NULL; + graph->cmap = NULL; + graph->tvwgt = NULL; + graph->invtvwgt = NULL; + + /* by default these are set to true, but the can be explicitly changed afterwards */ + graph->free_xadj = 1; + graph->free_vwgt = 1; + graph->free_vsize = 1; + graph->free_adjncy = 1; + graph->free_adjwgt = 1; + + + /* memory for the partition/refinement structure */ + graph->where = NULL; + graph->pwgts = NULL; + graph->id = NULL; + graph->ed = NULL; + graph->bndptr = NULL; + graph->bndind = NULL; + graph->nrinfo = NULL; + graph->ckrinfo = NULL; + graph->vkrinfo = NULL; + + /* linked-list structure */ + graph->coarser = NULL; + graph->finer = NULL; + +} + + +/*************************************************************************/ +/*! This function frees the refinement/partition memory stored in a graph */ +/*************************************************************************/ +void FreeRData(graph_t *graph) +{ + + /* The following is for the -minconn and -contig to work properly in + the vol-refinement routines */ + if ((void *)graph->ckrinfo == (void *)graph->vkrinfo) + graph->ckrinfo = NULL; + + + /* free partition/refinement structure */ + gk_free((void **)&graph->where, &graph->pwgts, &graph->id, &graph->ed, + &graph->bndptr, &graph->bndind, &graph->nrinfo, &graph->ckrinfo, + &graph->vkrinfo, LTERM); +} + + +/*************************************************************************/ +/*! This function deallocates any memory stored in a graph */ +/*************************************************************************/ +void FreeGraph(graph_t **r_graph) +{ + graph_t *graph; + + graph = *r_graph; + + /* free graph structure */ + if (graph->free_xadj) + gk_free((void **)&graph->xadj, LTERM); + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + if (graph->free_adjncy) + gk_free((void **)&graph->adjncy, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); + + /* free partition/refinement structure */ + FreeRData(graph); + + gk_free((void **)&graph->tvwgt, &graph->invtvwgt, &graph->label, + &graph->cmap, &graph, LTERM); + + *r_graph = NULL; +} + + +/*************************************************************************/ +/*! This function writes the key contents of the graph on disk and frees + the associated memory */ +/*************************************************************************/ +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs, ncon, *xadj; + static int gID = 1; + char outfile[1024]; + FILE *fpout; + + if (ctrl->ondisk == 0) + return; + + if (sizeof(idx_t)*(graph->nvtxs*(graph->ncon+1)+2*graph->xadj[graph->nvtxs]) < 128*1024*1024) + return; + + if (graph->gID > 0) { + sprintf(outfile, "metis%d.%d", (int)ctrl->pid, graph->gID); + gk_rmpath(outfile); + } + + graph->gID = gID++; + sprintf(outfile, "metis%d.%d", (int)ctrl->pid, graph->gID); + + if ((fpout = fopen(outfile, "wb")) == NULL) + return; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + + if (graph->free_xadj) { + if (fwrite(graph->xadj, sizeof(idx_t), nvtxs+1, fpout) != nvtxs+1) + goto error; + } + if (graph->free_vwgt) { + if (fwrite(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpout) != nvtxs*ncon) + goto error; + } + if (graph->free_adjncy) { + if (fwrite(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; + } + if (graph->free_adjwgt) { + if (fwrite(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpout) != xadj[nvtxs]) + goto error; + } + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + if (graph->free_vsize) { + if (fwrite(graph->vsize, sizeof(idx_t), nvtxs, fpout) != nvtxs) + goto error; + } + } + + fclose(fpout); + + if (graph->free_xadj) + gk_free((void **)&graph->xadj, LTERM); + if (graph->free_vwgt) + gk_free((void **)&graph->vwgt, LTERM); + if (graph->free_vsize) + gk_free((void **)&graph->vsize, LTERM); + if (graph->free_adjncy) + gk_free((void **)&graph->adjncy, LTERM); + if (graph->free_adjwgt) + gk_free((void **)&graph->adjwgt, LTERM); + + graph->ondisk = 1; + return; + +error: + printf("Failed on writing %s\n", outfile); + fclose(fpout); + gk_rmpath(outfile); + graph->ondisk = 0; +} + + +/*************************************************************************/ +/*! This function reads the key contents of a graph from the disk */ +/*************************************************************************/ +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs, ncon, *xadj; + char infile[1024]; + FILE *fpin; + + if (graph->ondisk == 0) + return; /* this graph is not on the disk */ + + sprintf(infile, "metis%d.%d", (int)ctrl->pid, graph->gID); + + if ((fpin = fopen(infile, "rb")) == NULL) + return; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + + if (graph->free_xadj) { + graph->xadj = imalloc(nvtxs+1, "graph_ReadFromDisk: xadj"); + if (fread(graph->xadj, sizeof(idx_t), nvtxs+1, fpin) != nvtxs+1) + goto error; + } + xadj = graph->xadj; + + if (graph->free_vwgt) { + graph->vwgt = imalloc(nvtxs*ncon, "graph_ReadFromDisk: vwgt"); + if (fread(graph->vwgt, sizeof(idx_t), nvtxs*ncon, fpin) != nvtxs*ncon) + goto error; + } + + if (graph->free_adjncy) { + graph->adjncy = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjncy"); + if (fread(graph->adjncy, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; + } + + if (graph->free_adjwgt) { + graph->adjwgt = imalloc(xadj[nvtxs], "graph_ReadFromDisk: adjwgt"); + if (fread(graph->adjwgt, sizeof(idx_t), xadj[nvtxs], fpin) != xadj[nvtxs]) + goto error; + } + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + if (graph->free_vsize) { + graph->vsize = imalloc(nvtxs, "graph_ReadFromDisk: vsize"); + if (fread(graph->vsize, sizeof(idx_t), nvtxs, fpin) != nvtxs) + goto error; + } + } + + fclose(fpin); +// printf("ondisk: deleting %s\n", infile); + gk_rmpath(infile); + + graph->gID = 0; + graph->ondisk = 0; + return; + +error: + fclose(fpin); + gk_rmpath(infile); + graph->ondisk = 0; + gk_errexit(SIGERR, "Failed to restore graph %s from the disk.\n", infile); +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/initpart.c b/3rdParty/metis/metis-5.1.1/libmetis/initpart.c new file mode 100644 index 000000000..f2fd3c962 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/initpart.c @@ -0,0 +1,630 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * initpart.c + * + * This file contains code that performs the initial partition of the + * coarsest graph + * + * Started 7/23/97 + * George + * + */ + +#include "metislib.h" + +/*************************************************************************/ +/*! This function computes the initial bisection of the coarsest graph */ +/*************************************************************************/ +void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + mdbglvl_et dbglvl; + + ASSERT(graph->tvwgt[0] >= 0); + + dbglvl = ctrl->dbglvl; + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); + + switch (ctrl->iptype) { + case METIS_IPTYPE_RANDOM: + if (graph->ncon == 1) + RandomBisection(ctrl, graph, ntpwgts, niparts); + else + McRandomBisection(ctrl, graph, ntpwgts, niparts); + break; + + case METIS_IPTYPE_GROW: + if (graph->nedges == 0) + if (graph->ncon == 1) + RandomBisection(ctrl, graph, ntpwgts, niparts); + else + McRandomBisection(ctrl, graph, ntpwgts, niparts); + else + if (graph->ncon == 1) + GrowBisection(ctrl, graph, ntpwgts, niparts); + else + McGrowBisection(ctrl, graph, ntpwgts, niparts); + break; + + default: + gk_errexit(SIGERR, "Unknown initial partition type: %d\n", ctrl->iptype); + } + + IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Cut: %"PRIDX"\n", graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); + ctrl->dbglvl = dbglvl; + +} + + +/*************************************************************************/ +/*! This function computes the initial separator of the coarsest graph */ +/*************************************************************************/ +void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts) +{ + real_t ntpwgts[2] = {0.5, 0.5}; + mdbglvl_et dbglvl; + + dbglvl = ctrl->dbglvl; + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE); + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); + + /* this is required for the cut-based part of the refinement */ + Setup2WayBalMultipliers(ctrl, graph, ntpwgts); + + switch (ctrl->iptype) { + case METIS_IPTYPE_EDGE: + if (graph->nedges == 0) + RandomBisection(ctrl, graph, ntpwgts, niparts); + else + GrowBisection(ctrl, graph, ntpwgts, niparts); + + Compute2WayPartitionParams(ctrl, graph); + ConstructSeparator(ctrl, graph); + break; + + case METIS_IPTYPE_NODE: + GrowBisectionNode(ctrl, graph, ntpwgts, niparts); + break; + + default: + gk_errexit(SIGERR, "Unkown iptype of %"PRIDX"\n", ctrl->iptype); + } + + IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Sep: %"PRIDX"\n", graph->mincut)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); + + ctrl->dbglvl = dbglvl; + +} + + +/*************************************************************************/ +/*! This function computes a bisection of a graph by randomly assigning + the vertices followed by a bisection refinement. + The resulting partition is returned in graph->where. +*/ +/*************************************************************************/ +void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, ii, j, k, nvtxs, pwgts[2], zeromaxpwgt, from, me, + bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where; + idx_t *perm, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + + zeromaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[0]; + + for (inbfs=0; inbfs<niparts; inbfs++) { + iset(nvtxs, 1, where); + + if (inbfs > 0) { + irandArrayPermute(nvtxs, perm, nvtxs/2, 1); + pwgts[1] = graph->tvwgt[0]; + pwgts[0] = 0; + + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + if (pwgts[0]+vwgt[i] < zeromaxpwgt) { + where[i] = 0; + pwgts[0] += vwgt[i]; + pwgts[1] -= vwgt[i]; + if (pwgts[0] > zeromaxpwgt) + break; + } + } + } + + /* Do some partition refinement */ + Compute2WayPartitionParams(ctrl, graph); + /* printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + + Balance2Way(ctrl, graph, ntpwgts); + /* printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + + FM_2WayRefine(ctrl, graph, ntpwgts, 4); + /* printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ + + if (inbfs==0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function takes a graph and produces a bisection by using a region + growing algorithm. The resulting bisection is refined using FM. + The resulting partition is returned in graph->where. +*/ +/*************************************************************************/ +void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, j, k, nvtxs, drain, nleft, first, last, + pwgts[2], oneminpwgt, onemaxpwgt, + from, me, bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where; + idx_t *queue, *touched, *gain, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + queue = iwspacemalloc(ctrl, nvtxs); + touched = iwspacemalloc(ctrl, nvtxs); + + onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[1]; + oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*ntpwgts[1]; + + for (inbfs=0; inbfs<niparts; inbfs++) { + iset(nvtxs, 1, where); + + iset(nvtxs, 0, touched); + + pwgts[1] = graph->tvwgt[0]; + pwgts[0] = 0; + + + queue[0] = irandInRange(nvtxs); + touched[queue[0]] = 1; + first = 0; + last = 1; + nleft = nvtxs-1; + drain = 0; + + /* Start the BFS from queue to get a partition */ + for (;;) { + if (first == last) { /* Empty. Disconnected graph! */ + if (nleft == 0 || drain) + break; + + k = irandInRange(nleft); + for (i=0; i<nvtxs; i++) { + if (touched[i] == 0) { + if (k == 0) + break; + else + k--; + } + } + + queue[0] = i; + touched[i] = 1; + first = 0; + last = 1; + nleft--; + } + + i = queue[first++]; + if (pwgts[0] > 0 && pwgts[1]-vwgt[i] < oneminpwgt) { + drain = 1; + continue; + } + + where[i] = 0; + INC_DEC(pwgts[0], pwgts[1], vwgt[i]); + if (pwgts[1] <= onemaxpwgt) + break; + + drain = 0; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (touched[k] == 0) { + queue[last++] = k; + touched[k] = 1; + nleft--; + } + } + } + + /* Check to see if we hit any bad limiting cases */ + if (pwgts[1] == 0) + where[irandInRange(nvtxs)] = 1; + if (pwgts[0] == 0) + where[irandInRange(nvtxs)] = 0; + + /************************************************************* + * Do some partition refinement + **************************************************************/ + Compute2WayPartitionParams(ctrl, graph); + /* + printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", + graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); + */ + + Balance2Way(ctrl, graph, ntpwgts); + /* + printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], + graph->pwgts[1], graph->mincut); + */ + + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + /* + printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], + graph->pwgts[1], graph->mincut); + */ + + if (inbfs == 0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function takes a multi-constraint graph and computes a bisection + by randomly assigning the vertices and then refining it. The resulting + partition is returned in graph->where. +*/ +/**************************************************************************/ +void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, ii, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs, qnum; + idx_t *bestwhere, *where, *perm, *counts; + idx_t *vwgt; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + vwgt = graph->vwgt; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + perm = iwspacemalloc(ctrl, nvtxs); + counts = iwspacemalloc(ctrl, ncon); + + for (inbfs=0; inbfs<2*niparts; inbfs++) { + irandArrayPermute(nvtxs, perm, nvtxs/2, 1); + iset(ncon, 0, counts); + + /* partition by spliting the queues randomly */ + for (ii=0; ii<nvtxs; ii++) { + i = perm[ii]; + qnum = iargmax(ncon, vwgt+i*ncon,1); + where[i] = (counts[qnum]++)%2; + } + + Compute2WayPartitionParams(ctrl, graph); + + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + + if (inbfs == 0 || bestcut >= graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function takes a multi-constraint graph and produces a bisection + by using a region growing algorithm. The resulting partition is + returned in graph->where. +*/ +/*************************************************************************/ +void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs; + idx_t *bestwhere, *where; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + + Allocate2WayPartitionMemory(ctrl, graph); + where = graph->where; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + + for (inbfs=0; inbfs<2*niparts; inbfs++) { + iset(nvtxs, 1, where); + where[irandInRange(nvtxs)] = 0; + + Compute2WayPartitionParams(ctrl, graph); + + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + + if (inbfs == 0 || bestcut >= graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + if (bestcut == 0) + break; + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/* This function takes a graph and produces a tri-section into left, right, + and separator using a region growing algorithm. The resulting separator + is refined using node FM. + The resulting partition is returned in graph->where. +*/ +/**************************************************************************/ +void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], oneminpwgt, + onemaxpwgt, from, me, bestcut=0, icut, mincut, inbfs; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *bndind; + idx_t *queue, *touched, *gain, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + bestwhere = iwspacemalloc(ctrl, nvtxs); + queue = iwspacemalloc(ctrl, nvtxs); + touched = iwspacemalloc(ctrl, nvtxs); + + onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*0.5; + oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*0.5; + + + /* Allocate refinement memory. Allocate sufficient memory for both edge and node */ + graph->pwgts = imalloc(3, "GrowBisectionNode: pwgts"); + graph->where = imalloc(nvtxs, "GrowBisectionNode: where"); + graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr"); + graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind"); + graph->id = imalloc(nvtxs, "GrowBisectionNode: id"); + graph->ed = imalloc(nvtxs, "GrowBisectionNode: ed"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo"); + + where = graph->where; + bndind = graph->bndind; + + for (inbfs=0; inbfs<niparts; inbfs++) { + iset(nvtxs, 1, where); + iset(nvtxs, 0, touched); + + pwgts[1] = graph->tvwgt[0]; + pwgts[0] = 0; + + queue[0] = irandInRange(nvtxs); + touched[queue[0]] = 1; + first = 0; last = 1; + nleft = nvtxs-1; + drain = 0; + + /* Start the BFS from queue to get a partition */ + for (;;) { + if (first == last) { /* Empty. Disconnected graph! */ + if (nleft == 0 || drain) + break; + + k = irandInRange(nleft); + for (i=0; i<nvtxs; i++) { /* select the kth untouched vertex */ + if (touched[i] == 0) { + if (k == 0) + break; + else + k--; + } + } + + queue[0] = i; + touched[i] = 1; + first = 0; + last = 1; + nleft--; + } + + i = queue[first++]; + if (pwgts[1]-vwgt[i] < oneminpwgt) { + drain = 1; + continue; + } + + where[i] = 0; + INC_DEC(pwgts[0], pwgts[1], vwgt[i]); + if (pwgts[1] <= onemaxpwgt) + break; + + drain = 0; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (touched[k] == 0) { + queue[last++] = k; + touched[k] = 1; + nleft--; + } + } + } + + /************************************************************* + * Do some partition refinement + **************************************************************/ + Compute2WayPartitionParams(ctrl, graph); + Balance2Way(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, 4); + + /* Construct and refine the vertex separator */ + for (i=0; i<graph->nbnd; i++) { + j = bndind[i]; + if (xadj[j+1]-xadj[j] > 0) /* ignore islands */ + where[j] = 2; + } + + Compute2WayNodePartitionParams(ctrl, graph); + FM_2WayNodeRefine2Sided(ctrl, graph, 1); + FM_2WayNodeRefine1Sided(ctrl, graph, 4); + + /* + printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", + inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); + */ + + if (inbfs == 0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + + +/*************************************************************************/ +/* This function takes a graph and produces a tri-section into left, right, + and separator using a region growing algorithm. The resulting separator + is refined using node FM. + The resulting partition is returned in graph->where. +*/ +/**************************************************************************/ +void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + idx_t niparts) +{ + idx_t i, j, k, nvtxs, bestcut=0, mincut, inbfs; + idx_t *xadj, *where, *bndind, *bestwhere; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + + /* Allocate refinement memory. Allocate sufficient memory for both edge and node */ + graph->pwgts = imalloc(3, "GrowBisectionNode: pwgts"); + graph->where = imalloc(nvtxs, "GrowBisectionNode: where"); + graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr"); + graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind"); + graph->id = imalloc(nvtxs, "GrowBisectionNode: id"); + graph->ed = imalloc(nvtxs, "GrowBisectionNode: ed"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo"); + + bestwhere = iwspacemalloc(ctrl, nvtxs); + + where = graph->where; + bndind = graph->bndind; + + for (inbfs=0; inbfs<niparts; inbfs++) { + iset(nvtxs, 1, where); + if (inbfs > 0) + where[irandInRange(nvtxs)] = 0; + + Compute2WayPartitionParams(ctrl, graph); + General2WayBalance(ctrl, graph, ntpwgts); + FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter); + + /* Construct and refine the vertex separator */ + for (i=0; i<graph->nbnd; i++) { + j = bndind[i]; + if (xadj[j+1]-xadj[j] > 0) /* ignore islands */ + where[j] = 2; + } + + Compute2WayNodePartitionParams(ctrl, graph); + FM_2WayNodeRefine2Sided(ctrl, graph, 4); + + /* + printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", + inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); + */ + + if (inbfs == 0 || bestcut > graph->mincut) { + bestcut = graph->mincut; + icopy(nvtxs, where, bestwhere); + } + } + + graph->mincut = bestcut; + icopy(nvtxs, bestwhere, where); + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/kmetis.c b/3rdParty/metis/metis-5.1.1/libmetis/kmetis.c new file mode 100644 index 000000000..6babf1cbc --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/kmetis.c @@ -0,0 +1,619 @@ +/*! +\file +\brief The top-level routines for multilevel k-way partitioning that minimizes + the edge cut. + +\date Started 7/28/1997 +\author George +\author Copyright 1997-2011, Regents of the University of Minnesota +\version\verbatim $Id: kmetis.c 20398 2016-11-22 17:17:12Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point for MCKMETIS */ +/*************************************************************************/ +int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, + real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, + idx_t *part) +{ + int sigrval=0, renumber=0; + graph_t *graph; + ctrl_t *ctrl; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* set up the run parameters */ + ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; + } + + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { + Change2CNumbering(*nvtxs, xadj, adjncy); + renumber = 1; + } + + /* set up the graph */ + graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); + + /* set up multipliers for making balance computations easier */ + SetupKWayBalMultipliers(ctrl, graph); + + /* set various run parameters that depend on the graph */ + ctrl->CoarsenTo = gk_max((*nvtxs)/(40*gk_log2(*nparts)), 30*(*nparts)); + ctrl->nIparts = (ctrl->nIparts != -1 ? ctrl->nIparts : (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5)); + + /* take care contiguity requests for disconnected graphs */ + if (ctrl->contig && !IsConnected(graph, 0)) + gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n"); + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); + + /* start the partitioning */ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + if (ctrl->dbglvl&512) { + *objval = BlockKWayPartitioning(ctrl, graph, part); + } + else { + *objval = MlevelKWayPartitioning(ctrl, graph, part); + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); + +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) + Change2FNumbering(*nvtxs, xadj, adjncy, part); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} + + +/*************************************************************************/ +/*! This function computes a k-way partitioning of a graph that minimizes + the specified objective function. + + \param ctrl is the control structure + \param graph is the graph to be partitioned + \param part is the vector that on return will store the partitioning + + \returns the objective value of the partitoning. The partitioning + itself is stored in the part vector. +*/ +/*************************************************************************/ +idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part) +{ + idx_t i, j, objval=0, curobj=0, bestobj=0; + real_t curbal=0.0, bestbal=0.0; + graph_t *cgraph; + int status; + + + for (i=0; i<ctrl->ncuts; i++) { + cgraph = CoarsenGraph(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr)); + AllocateKWayPartitionMemory(ctrl, cgraph); + + /* Release the work space */ + FreeWorkSpace(ctrl); + + /* Compute the initial partitioning */ + InitKWayPartitioning(ctrl, cgraph); + + /* Re-allocate the work space */ + AllocateWorkSpace(ctrl, graph); + AllocateRefinementWorkSpace(ctrl, 2*cgraph->nedges); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_IPART, + printf("Initial %"PRIDX"-way partitioning cut: %"PRIDX"\n", ctrl->nparts, objval)); + + RefineKWay(ctrl, graph, cgraph); + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + curobj = graph->mincut; + break; + + case METIS_OBJTYPE_VOL: + curobj = graph->minvol; + break; + + default: + gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); + } + + curbal = ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors); + + if (i == 0 + || (curbal <= 0.0005 && bestobj > curobj) + || (bestbal > 0.0005 && curbal < bestbal)) { + icopy(graph->nvtxs, graph->where, part); + bestobj = curobj; + bestbal = curbal; + } + + FreeRData(graph); + + if (bestobj == 0) + break; + } + + FreeGraph(&graph); + + return bestobj; +} + + +/*************************************************************************/ +/*! This function computes the initial k-way partitioning using PMETIS +*/ +/*************************************************************************/ +void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ntrials, options[METIS_NOPTIONS], curobj=0, bestobj=0; + idx_t *bestwhere=NULL; + real_t *ubvec=NULL; + int status; + + METIS_SetDefaultOptions(options); + //options[METIS_OPTION_NITER] = 10; + options[METIS_OPTION_NITER] = ctrl->niter; + options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; + options[METIS_OPTION_NO2HOP] = ctrl->no2hop; + options[METIS_OPTION_ONDISK] = ctrl->ondisk; + options[METIS_OPTION_DROPEDGES] = ctrl->dropedges; + + ubvec = rmalloc(graph->ncon, "InitKWayPartitioning: ubvec"); + for (i=0; i<graph->ncon; i++) + ubvec[i] = (real_t)pow(ctrl->ubfactors[i], 1.0/log(ctrl->nparts)); + + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + case METIS_OBJTYPE_VOL: + options[METIS_OPTION_NCUTS] = ctrl->nIparts; + status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, + graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, + graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, + options, &curobj, graph->where); + + if (status != METIS_OK) + gk_errexit(SIGERR, "Failed during initial partitioning\n"); + + break; + +#ifdef XXX /* This does not seem to help */ + case METIS_OBJTYPE_VOL: + bestwhere = imalloc(graph->nvtxs, "InitKWayPartitioning: bestwhere"); + options[METIS_OPTION_NCUTS] = 2; + + ntrials = (ctrl->nIparts+1)/2; + for (i=0; i<ntrials; i++) { + status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, + graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, + graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, + options, &curobj, graph->where); + if (status != METIS_OK) + gk_errexit(SIGERR, "Failed during initial partitioning\n"); + + curobj = ComputeVolume(graph, graph->where); + + if (i == 0 || bestobj > curobj) { + bestobj = curobj; + if (i < ntrials-1) + icopy(graph->nvtxs, graph->where, bestwhere); + } + + if (bestobj == 0) + break; + } + if (bestobj != curobj) + icopy(graph->nvtxs, bestwhere, graph->where); + + break; +#endif + + default: + gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); + } + + gk_free((void **)&ubvec, &bestwhere, LTERM); + +} + + + +/*************************************************************************/ +/*! This function computes a k-way partitioning of a graph that minimizes + the specified objective function. + + \param ctrl is the control structure + \param graph is the graph to be partitioned + \param part is the vector that on return will store the partitioning + + \returns the objective value of the partitoning. The partitioning + itself is stored in the part vector. +*/ +/*************************************************************************/ +idx_t BlockKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part) +{ + idx_t i, ii, j, nvtxs, objval=0; + idx_t *vwgt; + idx_t nparts, mynparts; + idx_t *fpwgts, *cpwgts, *fpart, *perm; + ipq_t *queue; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + vwgt = graph->vwgt; + + nparts = ctrl->nparts; + + mynparts = gk_min(100*nparts, sqrt(nvtxs)); + + for (i=0; i<nvtxs; i++) + part[i] = i%nparts; + irandArrayPermute(nvtxs, part, 4*nvtxs, 0); + printf("Random cut: %d\n", (int)ComputeCut(graph, part)); + + + /* create the initial multi-section */ + mynparts = GrowMultisection(ctrl, graph, mynparts, part); + + /* balance using label-propagation and refine using a randomized greedy strategy */ + BalanceAndRefineLP(ctrl, graph, mynparts, part); + + + /* determine the size of the fine partitions */ + fpwgts = iset(mynparts, 0, iwspacemalloc(ctrl, mynparts)); + for (i=0; i<nvtxs; i++) + fpwgts[part[i]] += vwgt[i]; + + /* create and initialize the queue that will determine + where to put the next one */ + cpwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + queue = ipqCreate(nparts); + for (i=0; i<nparts; i++) + ipqInsert(queue, i, 0); + + /* assign the fine partitions into the coarse partitions */ + fpart = iwspacemalloc(ctrl, mynparts); + perm = iwspacemalloc(ctrl, mynparts); + irandArrayPermute(mynparts, perm, mynparts, 1); + for (ii=0; ii<mynparts; ii++) { + i = perm[ii]; + j = ipqSeeTopVal(queue); + fpart[i] = j; + cpwgts[j] += fpwgts[i]; + ipqUpdate(queue, j, -cpwgts[j]); + } + ipqDestroy(queue); + + for (i=0; i<nparts; i++) + printf("cpwgts[%d] = %d\n", (int)i, (int)cpwgts[i]); + + for (i=0; i<nvtxs; i++) + part[i] = fpart[part[i]]; + + WCOREPOP; + + return ComputeCut(graph, part); +} + + +/*************************************************************************/ +/*! This function takes a graph and produces a bisection by using a region + growing algorithm. The resulting bisection is refined using FM. + The resulting partition is returned in graph->where. +*/ +/*************************************************************************/ +idx_t GrowMultisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i, j, k, l, nvtxs, nleft, first, last; + idx_t *xadj, *vwgt, *adjncy; + idx_t *queue; + idx_t tvwgt, maxpwgt, *pwgts; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->xadj; + adjncy = graph->adjncy; + + queue = iwspacemalloc(ctrl, nvtxs); + + + /* Select the seeds for the nparts-way BFS */ + for (nleft=0, i=0; i<nvtxs; i++) { + if (xadj[i+1]-xadj[i] > 1) /* a seed's degree should be > 1 */ + where[nleft++] = i; + } + nparts = gk_min(nparts, nleft); + for (i=0; i<nparts; i++) { + j = irandInRange(nleft); + queue[i] = where[j]; + where[j] = --nleft; + } + + pwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + tvwgt = isum(nvtxs, vwgt, 1); + maxpwgt = (1.5*tvwgt)/nparts; + + iset(nvtxs, -1, where); + for (i=0; i<nparts; i++) { + where[queue[i]] = i; + pwgts[i] = vwgt[queue[i]]; + } + + first = 0; + last = nparts; + nleft = nvtxs-nparts; + + + /* Start the BFS from queue to get a partition */ + while (first < last) { + i = queue[first++]; + l = where[i]; + if (pwgts[l] > maxpwgt) + continue; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (where[k] == -1) { + if (pwgts[l]+vwgt[k] > maxpwgt) + break; + pwgts[l] += vwgt[k]; + where[k] = l; + queue[last++] = k; + nleft--; + } + } + } + + /* Assign the unassigned vertices randomly to the nparts partitions */ + if (nleft > 0) { + for (i=0; i<nvtxs; i++) { + if (where[i] == -1) + where[i] = irandInRange(nparts); + } + } + + WCOREPOP; + + return nparts; +} + + +/*************************************************************************/ +/*! This function balances the partitioning using label propagation. +*/ +/*************************************************************************/ +void BalanceAndRefineLP(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t ii, i, j, k, u, v, nvtxs, iter; + idx_t *xadj, *vwgt, *adjncy, *adjwgt; + idx_t tvwgt, *pwgts, maxpwgt, minpwgt; + idx_t *perm; + idx_t from, to, nmoves, nnbrs, *nbrids, *nbrwgts, *nbrmrks; + real_t ubfactor; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + pwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + + ubfactor = I2RUBFACTOR(ctrl->ufactor); + tvwgt = isum(nvtxs, vwgt, 1); + maxpwgt = (ubfactor*tvwgt)/nparts; + minpwgt = (1.0*tvwgt)/(ubfactor*nparts); + + for (i=0; i<nvtxs; i++) + pwgts[where[i]] += vwgt[i]; + + /* for randomly visiting the vertices */ + perm = iincset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + + /* for keeping track of adjancent partitions */ + nbrids = iwspacemalloc(ctrl, nparts); + nbrwgts = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + nbrmrks = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + /* perform a fixed number of balancing LP iterations */ + if (ctrl->dbglvl&METIS_DBG_REFINE) + printf("BLP: nparts: %"PRIDX", min-max: [%"PRIDX", %"PRIDX"], bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nparts, minpwgt, maxpwgt, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + for (iter=0; iter<ctrl->niter; iter++) { + if (imax(nparts, pwgts, 1)*nparts < ubfactor*tvwgt) + break; + + irandArrayPermute(nvtxs, perm, nvtxs/8, 1); + nmoves = 0; + + for (ii=0; ii<nvtxs; ii++) { + u = perm[ii]; + + from = where[u]; + if (pwgts[from] - vwgt[u] < minpwgt) + continue; + + nnbrs = 0; + for (j=xadj[u]; j<xadj[u+1]; j++) { + v = adjncy[j]; + to = where[v]; + + if (pwgts[to] + vwgt[u] > maxpwgt) + continue; /* skip if 'to' is overweight */ + + if ((k = nbrmrks[to]) == -1) { + nbrmrks[to] = k = nnbrs++; + nbrids[k] = to; + } + nbrwgts[k] += xadj[v+1]-xadj[v]; + } + if (nnbrs == 0) + continue; + + to = nbrids[iargmax(nnbrs, nbrwgts, 1)]; + if (from != to) { + where[u] = to; + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + nmoves++; + } + + for (k=0; k<nnbrs; k++) { + nbrmrks[nbrids[k]] = -1; + nbrwgts[k] = 0; + } + + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) + printf(" nmoves: %8"PRIDX", bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nmoves, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + + if (nmoves == 0) + break; + } + + /* perform a fixed number of refinement LP iterations */ + if (ctrl->dbglvl&METIS_DBG_REFINE) + printf("RLP: nparts: %"PRIDX", min-max: [%"PRIDX", %"PRIDX"], bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nparts, minpwgt, maxpwgt, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + for (iter=0; iter<ctrl->niter; iter++) { + irandArrayPermute(nvtxs, perm, nvtxs/8, 1); + nmoves = 0; + + for (ii=0; ii<nvtxs; ii++) { + u = perm[ii]; + + from = where[u]; + if (pwgts[from] - vwgt[u] < minpwgt) + continue; + + nnbrs = 0; + for (j=xadj[u]; j<xadj[u+1]; j++) { + v = adjncy[j]; + to = where[v]; + + if (to != from && pwgts[to] + vwgt[u] > maxpwgt) + continue; /* skip if 'to' is overweight */ + + if ((k = nbrmrks[to]) == -1) { + nbrmrks[to] = k = nnbrs++; + nbrids[k] = to; + } + nbrwgts[k] += adjwgt[j]; + } + if (nnbrs == 0) + continue; + + to = nbrids[iargmax(nnbrs, nbrwgts, 1)]; + if (from != to) { + where[u] = to; + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + nmoves++; + } + + for (k=0; k<nnbrs; k++) { + nbrmrks[nbrids[k]] = -1; + nbrwgts[k] = 0; + } + + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) + printf(" nmoves: %8"PRIDX", bal: %7.4"PRREAL", cut: %9"PRIDX"\n", + nmoves, 1.0*imax(nparts, pwgts, 1)*nparts/tvwgt, ComputeCut(graph, where)); + + if (nmoves == 0) + break; + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This uses Metis' routines for balancing and refining the multi-BFS + solution. +*/ +/*************************************************************************/ +void BalanceAndRefine(ctrl_t *origctrl, graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i; + idx_t options[METIS_NOPTIONS]; + ctrl_t *ctrl; + + FreeWorkSpace(origctrl); + + METIS_SetDefaultOptions(options); + options[METIS_OPTION_NITER] = origctrl->niter; + options[METIS_OPTION_DBGLVL] = origctrl->dbglvl; + options[METIS_OPTION_UFACTOR] = origctrl->ufactor; + options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; + + ctrl = SetupCtrl(METIS_OP_KMETIS, options, 1, nparts, NULL, NULL); + + AllocateWorkSpace(ctrl, graph); + AllocateRefinementWorkSpace(ctrl, 2*graph->nedges); + + AllocateKWayPartitionMemory(ctrl, graph); + icopy(graph->nvtxs, where, graph->where); + + ComputeKWayPartitionParams(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(origctrl->RefTmr)); + + SetupKWayBalMultipliers(ctrl, graph); + + if (!IsBalanced(ctrl, graph, .02)) { + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 1, 0, OMODE_BALANCE); + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + } + + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 5.0, OMODE_REFINE); + icopy(graph->nvtxs, graph->where, where); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(origctrl->RefTmr)); + + FreeRData(graph); + FreeCtrl(&ctrl); + + AllocateWorkSpace(origctrl, graph); +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/kwayfm.c b/3rdParty/metis/metis-5.1.1/libmetis/kwayfm.c new file mode 100644 index 000000000..365dbad8b --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/kwayfm.c @@ -0,0 +1,2548 @@ +/*! +\file +\brief Routines for k-way refinement + +\date Started 7/28/97 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: kwayfm.c 17513 2014-08-05 16:20:50Z dominique $ +*/ + +#include "metislib.h" + + + +/*************************************************************************/ +/* Top-level routine for k-way partitioning refinement. This routine just + calls the appropriate refinement routine based on the objectives and + constraints. */ +/*************************************************************************/ +void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + if (graph->ncon == 1) + Greedy_KWayCutOptimize(ctrl, graph, niter, ffactor, omode); + else + Greedy_McKWayCutOptimize(ctrl, graph, niter, ffactor, omode); + break; + + case METIS_OBJTYPE_VOL: + if (graph->ncon == 1) + Greedy_KWayVolOptimize(ctrl, graph, niter, ffactor, omode); + else + Greedy_McKWayVolOptimize(ctrl, graph, niter, ffactor, omode); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_KWayCutOptimize0(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; + idx_t from, me, to, oldcut, vwgt; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *tpwgts, ubfactor; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + rpq_t *queue; + real_t rgain; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + ffactor = 0.0; + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts+2); + maxpwgts = iwspacemalloc(ctrl, nparts+2); + + if (omode == OMODE_BALANCE) + ubfactor = ctrl->ubfactors[0]; + else + ubfactor = gk_max(ctrl->ubfactors[0], ComputeLoadImbalance(graph, nparts, ctrl->pijbm)); + + for (i=0; i<nparts; i++) { + maxpwgts[i] = tpwgts[i]*graph->tvwgt[0]*ubfactor; + minpwgts[i] = tpwgts[i]*graph->tvwgt[0]*(1.0/ubfactor); + } + maxpwgts[nparts] = maxpwgts[nparts+1] = 0; + minpwgts[nparts] = minpwgts[nparts+1] = 0; + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts+2, 2, iwspacemalloc(ctrl, nparts+2)); + safetos[nparts] = safetos[nparts+1] = 0; + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX, + (omode == OMODE_REFINE ? "GRC" : "GBC"), + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = rpqCreate(nvtxs); + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + ASSERT(ComputeCut(graph, where) == graph->mincut); + + if (omode == OMODE_BALANCE) { + /* Check to see if things are out of balance, given the tolerance */ + for (i=0; i<nparts+2; i++) { + if (pwgts[i] > maxpwgts[i] || pwgts[i] < minpwgts[i]) + break; + } + if (i == nparts+2) /* Things are balanced. Return right away */ + break; + } + + oldcut = graph->mincut; + nbnd = graph->nbnd; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(nbnd, perm, nbnd/4, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[perm[ii]]; + rgain = (graph->ckrinfo[i].nnbrs > 0 ? + 1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) + - graph->ckrinfo[i].id; + rpqInsert(queue, i, rgain); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = rpqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->ckrinfo+i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + from = where[i]; + vwgt = graph->vwgt[i]; + + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->id > 0 && pwgts[from]-vwgt < minpwgts[from]) + continue; + } + else { /* OMODE_BALANCE */ + if (pwgts[from]-vwgt < minpwgts[from]) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].ed-myrinfo->id; + if (gain >= 0 && pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + gain = mynbrs[j].ed-myrinfo->id; + if ((mynbrs[j].ed > mynbrs[k].ed && pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + || + (mynbrs[j].ed == mynbrs[k].ed && + tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid])) + k = j; + } + + to = mynbrs[k].pid; + + gain = mynbrs[k].ed-myrinfo->id; + if (!(gain > 0 + || (gain == 0 + && (pwgts[from] >= maxpwgts[from] + || tpwgts[to]*pwgts[from] > tpwgts[from]*(pwgts[to]+vwgt) + || (iii%2 == 0 && safetos[to] == 2) + ) + ) + ) + ) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (pwgts[to]+vwgt <= maxpwgts[to] || + tpwgts[from]*(pwgts[to]+vwgt) <= tpwgts[to]*pwgts[from]) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + k = j; + } + + to = mynbrs[k].pid; + + if (pwgts[from] < maxpwgts[from] && pwgts[to] > minpwgts[to] && + mynbrs[k].ed-myrinfo->id < 0) + continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX"/%"PRIDX" to %3"PRIDX"/%"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + i, from, safetos[from], to, safetos[to], pwgts[from], pwgts[to], mynbrs[k].ed-myrinfo->id, graph->mincut)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms); + + /* take care of the adjancent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], &maxndoms); + UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], &maxndoms); + } + } + } + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt); + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, + nupd, updptr, updind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + } + + graph->nbnd = nbnd; + + /* Reset the vstatus and associated data structures */ + for (i=0; i<nupd; i++) { + ASSERT(updptr[updind[i]] != -1); + ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT); + vstatus[updind[i]] = VPQSTATUS_NOTPRESENT; + updptr[updind[i]] = -1; + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut)) + break; + } + + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; + idx_t from, me, to, oldcut, vwgt; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *tpwgts, ubfactor; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + rpq_t *queue; + real_t rgain; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + ffactor = 0.0; + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts+2); + maxpwgts = iwspacemalloc(ctrl, nparts+2); + + if (omode == OMODE_BALANCE) + ubfactor = ctrl->ubfactors[0]; + else + ubfactor = gk_max(ctrl->ubfactors[0], ComputeLoadImbalance(graph, nparts, ctrl->pijbm)); + + for (i=0; i<nparts; i++) { + maxpwgts[i] = tpwgts[i]*graph->tvwgt[0]*ubfactor; + minpwgts[i] = tpwgts[i]*graph->tvwgt[0]*(1.0/ubfactor); + } + maxpwgts[nparts] = maxpwgts[nparts+1] = 0; + minpwgts[nparts] = minpwgts[nparts+1] = 0; + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts+2, 2, iwspacemalloc(ctrl, nparts+2)); + safetos[nparts] = safetos[nparts+1] = 0; + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX, + (omode == OMODE_REFINE ? "GRC" : "GBC"), + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = rpqCreate(nvtxs); + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + ASSERT(ComputeCut(graph, where) == graph->mincut); + + if (omode == OMODE_BALANCE) { + /* Check to see if things are out of balance, given the tolerance */ + for (i=0; i<nparts+2; i++) { + if (pwgts[i] > maxpwgts[i] || pwgts[i] < minpwgts[i]) + break; + } + if (i == nparts+2) /* Things are balanced. Return right away */ + break; + } + + oldcut = graph->mincut; + nbnd = graph->nbnd; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(nbnd, perm, nbnd/4, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[perm[ii]]; + rgain = (graph->ckrinfo[i].nnbrs > 0 ? + 1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) + - graph->ckrinfo[i].id; + rpqInsert(queue, i, rgain); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = rpqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->ckrinfo+i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + from = where[i]; + vwgt = graph->vwgt[i]; + +#ifdef XXX + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->id > 0 && pwgts[from]-vwgt < minpwgts[from]) + continue; + } + else { /* OMODE_BALANCE */ + if (pwgts[from]-vwgt < minpwgts[from]) + continue; + } +#endif + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (((mynbrs[k].ed > myrinfo->id) && + ((pwgts[from]-vwgt >= minpwgts[from]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) && + ((pwgts[to]+vwgt <= maxpwgts[to]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) || + ((mynbrs[k].ed == myrinfo->id) && + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (((mynbrs[j].ed > mynbrs[k].ed) && + ((pwgts[from]-vwgt >= minpwgts[from]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) && + ((pwgts[to]+vwgt <= maxpwgts[to]) || + (tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt))) + ) || + ((mynbrs[j].ed == mynbrs[k].ed) && + (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid])) + ) + k = j; + } + + to = mynbrs[k].pid; + + gain = mynbrs[k].ed-myrinfo->id; + /* + if (!(gain > 0 + || (gain == 0 + && (pwgts[from] >= maxpwgts[from] + || tpwgts[to]*pwgts[from] > tpwgts[from]*(pwgts[to]+vwgt) + || (iii%2 == 0 && safetos[to] == 2) + ) + ) + ) + ) + continue; + */ + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + /* the correctness of the following test follows from the correctness + of the similar test in the subsequent loop */ + if (from >= nparts || tpwgts[from]*pwgts[to] < tpwgts[to]*(pwgts[from]-vwgt)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + k = j; + } + + to = mynbrs[k].pid; + + //if (pwgts[from] < maxpwgts[from] && pwgts[to] > minpwgts[to] && + // mynbrs[k].ed-myrinfo->id < 0) + // continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX"/%"PRIDX" to %3"PRIDX"/%"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + i, from, safetos[from], to, safetos[to], pwgts[from], pwgts[to], mynbrs[k].ed-myrinfo->id, graph->mincut)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms); + + /* take care of the adjancent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], &maxndoms); + UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], &maxndoms); + } + } + } + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt); + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, + nupd, updptr, updind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + } + + graph->nbnd = nbnd; + + /* Reset the vstatus and associated data structures */ + for (i=0; i<nupd; i++) { + ASSERT(updptr[updind[i]] != -1); + ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT); + vstatus[updind[i]] = VPQSTATUS_NOTPRESENT; + updptr[updind[i]] = -1; + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut)) + break; + } + + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way refinement that minimizes the communication volume. This is a + greedy routine and the vertices are visited in decreasing gv order. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + +*/ +/**************************************************************************/ +void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; + idx_t from, me, to, oldcut, vwgt; + idx_t *xadj, *adjncy; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *tpwgts; + + /* Volume-specific/different variables */ + ipq_t *queue; + idx_t oldvol, xgain; + idx_t *vmarker, *pmarker, *modind; + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + bndptr = graph->bndptr; + bndind = graph->bndind; + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts); + maxpwgts = iwspacemalloc(ctrl, nparts); + + for (i=0; i<nparts; i++) { + maxpwgts[i] = ctrl->tpwgts[i]*graph->tvwgt[0]*ctrl->ubfactors[0]; + minpwgts[i] = ctrl->tpwgts[i]*graph->tvwgt[0]*(1.0/ctrl->ubfactors[0]); + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL + ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX, + (omode == OMODE_REFINE ? "GRV" : "GBV"), + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = ipqCreate(nvtxs); + + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + ASSERT(ComputeVolume(graph, where) == graph->minvol); + + if (omode == OMODE_BALANCE) { + /* Check to see if things are out of balance, given the tolerance */ + for (i=0; i<nparts; i++) { + if (pwgts[i] > maxpwgts[i]) + break; + } + if (i == nparts) /* Things are balanced. Return right away */ + break; + } + + oldcut = graph->mincut; + oldvol = graph->minvol; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1); + for (ii=0; ii<graph->nbnd; ii++) { + i = bndind[perm[ii]]; + ipqInsert(queue, i, graph->vkrinfo[i].gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = ipqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->vkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + from = where[i]; + vwgt = graph->vwgt[i]; + + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->nid > 0 && pwgts[from]-vwgt < minpwgts[from]) + continue; + } + else { /* OMODE_BALANCE */ + if (pwgts[from]-vwgt < minpwgts[from]) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].gv + xgain; + if (gain >= 0 && pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + gain = mynbrs[j].gv + xgain; + if ((mynbrs[j].gv > mynbrs[k].gv && + pwgts[to]+vwgt <= maxpwgts[to]+ffactor*gain) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned > mynbrs[k].ned && + pwgts[to]+vwgt <= maxpwgts[to]) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned == mynbrs[k].ned && + tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + ) + k = j; + } + to = mynbrs[k].pid; + + ASSERT(xgain+mynbrs[k].gv >= 0); + + j = 0; + if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0) + j = 1; + else if (mynbrs[k].ned-myrinfo->nid == 0) { + if ((iii%2 == 0 && safetos[to] == 2) || + pwgts[from] >= maxpwgts[from] || + tpwgts[from]*(pwgts[to]+vwgt) < tpwgts[to]*pwgts[from]) + j = 1; + } + if (j == 0) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (pwgts[to]+vwgt <= maxpwgts[to] || + tpwgts[from]*(pwgts[to]+vwgt) <= tpwgts[to]*pwgts[from]) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (tpwgts[mynbrs[k].pid]*pwgts[to] < tpwgts[to]*pwgts[mynbrs[k].pid]) + k = j; + } + to = mynbrs[k].pid; + + if (pwgts[from] < maxpwgts[from] && pwgts[to] > minpwgts[to] && + (xgain+mynbrs[k].gv < 0 || + (xgain+mynbrs[k].gv == 0 && mynbrs[k].ned-myrinfo->nid < 0)) + ) + continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + INC_DEC(pwgts[to], pwgts[from], vwgt); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + graph->minvol -= (xgain+mynbrs[k].gv); + where[i] = to; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". " + "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, + graph->mincut, graph->minvol)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms); + + /* take care of the adjancent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -1, &maxndoms); + UpdateEdgeSubDomainGraph(ctrl, to, me, 1, &maxndoms); + } + } + } + + /* Update the id/ed/gains/bnd/queue of potentially affected nodes */ + KWayVolUpdate(ctrl, graph, i, from, to, queue, vstatus, &nupd, updptr, + updind, bndtype, vmarker, pmarker, modind); + + /*CheckKWayVolPartitionParams(ctrl, graph); */ + } + + + /* Reset the vstatus and associated data structures */ + for (i=0; i<nupd; i++) { + ASSERT(updptr[updind[i]] != -1); + ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT); + vstatus[updind[i]] = VPQSTATUS_NOTPRESENT; + updptr[updind[i]] = -1; + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || + (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut)) + break; + } + + ipqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; + idx_t from, me, to, cto, oldcut; + idx_t *xadj, *vwgt, *adjncy, *adjwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *ubfactors, *pijbm; + real_t origbal; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + rpq_t *queue; + real_t rgain; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + pijbm = ctrl->pijbm; + + + /* Determine the ubfactors. The method used is different based on omode. + When OMODE_BALANCE, the ubfactors are those supplied by the user. + When OMODE_REFINE, the ubfactors are the max of the current partition + and the user-specified ones. */ + ubfactors = rwspacemalloc(ctrl, ncon); + ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors); + origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors); + if (omode == OMODE_BALANCE) { + rcopy(ncon, ctrl->ubfactors, ubfactors); + } + else { + for (i=0; i<ncon; i++) + ubfactors[i] = (ubfactors[i] > ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]); + } + + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts*ncon); + maxpwgts = iwspacemalloc(ctrl, nparts*ncon); + + for (i=0; i<nparts; i++) { + for (j=0; j<ncon; j++) { + maxpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j]; + /*minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]);*/ + minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2; + } + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL")," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX", (%"PRIDX")", + (omode == OMODE_REFINE ? "GRC" : "GBC"), + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), imax(nparts*ncon, maxpwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), origbal, + graph->nvtxs, graph->nbnd, graph->mincut, niter); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = rpqCreate(nvtxs); + + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + ASSERT(ComputeCut(graph, where) == graph->mincut); + + /* In balancing mode, exit as soon as balance is reached */ + if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0)) + break; + + oldcut = graph->mincut; + nbnd = graph->nbnd; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(nbnd, perm, nbnd/4, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[perm[ii]]; + rgain = (graph->ckrinfo[i].nnbrs > 0 ? + 1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) + - graph->ckrinfo[i].id; + rpqInsert(queue, i, rgain); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = rpqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->ckrinfo+i; + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + from = where[i]; + + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->id > 0 && + !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + else { /* OMODE_BALANCE */ + if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].ed-myrinfo->id; + if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if ((mynbrs[j].ed > mynbrs[k].ed && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].ed == mynbrs[k].ed && + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon))) { + k = j; + cto = to; + } + } + to = cto; + + gain = mynbrs[k].ed-myrinfo->id; + if (!(gain > 0 + || (gain == 0 + && (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon) + || (iii%2 == 0 && safetos[to] == 2) + ) + ) + ) + ) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon)) { + k = j; + cto = to; + } + } + to = cto; + + if (mynbrs[k].ed-myrinfo->id < 0 && + !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + continue; + } + + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" to %3"PRIDX". Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + i, to, mynbrs[k].ed-myrinfo->id, graph->mincut)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms); + + /* take care of the adjancent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], &maxndoms); + UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], &maxndoms); + } + } + } + + /* Update ID/ED and BND related information for the moved vertex */ + iaxpy(ncon, 1, vwgt+i*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+i*ncon, 1, pwgts+from*ncon, 1); + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, + nbnd, bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, + nupd, updptr, updind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + } + + graph->nbnd = nbnd; + + /* Reset the vstatus and associated data structures */ + for (i=0; i<nupd; i++) { + ASSERT(updptr[updind[i]] != -1); + ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT); + vstatus[updind[i]] = VPQSTATUS_NOTPRESENT; + updptr[updind[i]] = -1; + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut)) + break; + } + + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way refinement that minimizes the communication volume. This is a + greedy routine and the vertices are visited in decreasing gv order. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + +*/ +/**************************************************************************/ +void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; + idx_t from, me, to, cto, oldcut; + idx_t *xadj, *vwgt, *adjncy; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nmoved, nupd, *vstatus, *updptr, *updind; + idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL; + idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL; + idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE); + real_t *ubfactors, *pijbm; + real_t origbal; + + /* Volume-specific/different variables */ + ipq_t *queue; + idx_t oldvol, xgain; + idx_t *vmarker, *pmarker, *modind; + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + bndptr = graph->bndptr; + bndind = graph->bndind; + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + pijbm = ctrl->pijbm; + + + /* Determine the ubfactors. The method used is different based on omode. + When OMODE_BALANCE, the ubfactors are those supplied by the user. + When OMODE_REFINE, the ubfactors are the max of the current partition + and the user-specified ones. */ + ubfactors = rwspacemalloc(ctrl, ncon); + ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors); + origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors); + if (omode == OMODE_BALANCE) { + rcopy(ncon, ctrl->ubfactors, ubfactors); + } + else { + for (i=0; i<ncon; i++) + ubfactors[i] = (ubfactors[i] > ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]); + } + + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts*ncon); + maxpwgts = iwspacemalloc(ctrl, nparts*ncon); + + for (i=0; i<nparts; i++) { + for (j=0; j<ncon; j++) { + maxpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j]; + /*minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]); */ + minpwgts[i*ncon+j] = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2; + } + } + + perm = iwspacemalloc(ctrl, nvtxs); + + + /* This stores the valid target subdomains. It is used when ctrl->minconn to + control the subdomains to which moves are allowed to be made. + When ctrl->minconn is false, the default values of 2 allow all moves to + go through and it does not interfere with the zero-gain move selection. */ + safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts)); + + if (ctrl->minconn) { + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + doms = iset(nparts, 0, ctrl->pvec1); + } + + + /* Setup updptr, updind like boundary info to keep track of the vertices whose + vstatus's need to be reset at the end of the inner iteration */ + vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs)); + updptr = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + updind = iwspacemalloc(ctrl, nvtxs); + + if (ctrl->contig) { + /* The arrays that will be used for limited check of articulation points */ + bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + bfsind = iwspacemalloc(ctrl, nvtxs); + bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + } + + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL")," + ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX", (%"PRIDX")", + (omode == OMODE_REFINE ? "GRV" : "GBV"), + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), imax(nparts*ncon, maxpwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), origbal, + graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol, niter); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + queue = ipqCreate(nvtxs); + + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + ASSERT(ComputeVolume(graph, where) == graph->minvol); + + /* In balancing mode, exit as soon as balance is reached */ + if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0)) + break; + + oldcut = graph->mincut; + oldvol = graph->minvol; + nupd = 0; + + if (ctrl->minconn) + maxndoms = imax(nparts, nads,1); + + /* Insert the boundary vertices in the priority queue */ + irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1); + for (ii=0; ii<graph->nbnd; ii++) { + i = bndind[perm[ii]]; + ipqInsert(queue, i, graph->vkrinfo[i].gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(nupd, updind, updptr, i); + } + + /* Start extracting vertices from the queue and try to move them */ + for (nmoved=0, iii=0;;iii++) { + if ((i = ipqGetTop(queue)) == -1) + break; + vstatus[i] = VPQSTATUS_EXTRACTED; + + myrinfo = graph->vkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + from = where[i]; + + /* Prevent moves that make 'from' domain underbalanced */ + if (omode == OMODE_REFINE) { + if (myrinfo->nid > 0 && + !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + else { /* OMODE_BALANCE */ + if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minpwgts+from*ncon)) + continue; + } + + if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk)) + continue; + + if (ctrl->minconn) + SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms); + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0); + + /* Find the most promising subdomain to move to */ + if (omode == OMODE_REFINE) { + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + gain = mynbrs[k].gv + xgain; + if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + gain = mynbrs[j].gv + xgain; + if ((mynbrs[j].gv > mynbrs[k].gv && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned > mynbrs[k].ned && + ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon)) + || + (mynbrs[j].gv == mynbrs[k].gv && + mynbrs[j].ned == mynbrs[k].ned && + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon))) { + k = j; + cto = to; + } + } + to = cto; + + j = 0; + if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0) + j = 1; + else if (mynbrs[k].ned-myrinfo->nid == 0) { + if ((iii%2 == 0 && safetos[to] == 2) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + j = 1; + } + if (j == 0) + continue; + } + else { /* OMODE_BALANCE */ + for (k=myrinfo->nnbrs-1; k>=0; k--) { + if (!safetos[to=mynbrs[k].pid]) + continue; + if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxpwgts+to*ncon) || + BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + break; + } + if (k < 0) + continue; /* break out if you did not find a candidate */ + + cto = to; + for (j=k-1; j>=0; j--) { + if (!safetos[to=mynbrs[j].pid]) + continue; + if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + 1, pwgts+cto*ncon, pijbm+cto*ncon, + 1, pwgts+to*ncon, pijbm+to*ncon)) { + k = j; + cto = to; + } + } + to = cto; + + if ((xgain+mynbrs[k].gv < 0 || + (xgain+mynbrs[k].gv == 0 && mynbrs[k].ned-myrinfo->nid < 0)) + && + !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, + -1, pwgts+from*ncon, pijbm+from*ncon, + +1, pwgts+to*ncon, pijbm+to*ncon)) + continue; + } + + + /*===================================================================== + * If we got here, we can now move the vertex from 'from' to 'to' + *======================================================================*/ + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + graph->minvol -= (xgain+mynbrs[k].gv); + where[i] = to; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". " + "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, + graph->mincut, graph->minvol)); + + /* Update the subdomain connectivity information */ + if (ctrl->minconn) { + /* take care of i's move itself */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms); + + /* take care of the adjancent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -1, &maxndoms); + UpdateEdgeSubDomainGraph(ctrl, to, me, 1, &maxndoms); + } + } + } + + /* Update pwgts */ + iaxpy(ncon, 1, vwgt+i*ncon, 1, pwgts+to*ncon, 1); + iaxpy(ncon, -1, vwgt+i*ncon, 1, pwgts+from*ncon, 1); + + /* Update the id/ed/gains/bnd/queue of potentially affected nodes */ + KWayVolUpdate(ctrl, graph, i, from, to, queue, vstatus, &nupd, updptr, + updind, bndtype, vmarker, pmarker, modind); + + /*CheckKWayVolPartitionParams(ctrl, graph); */ + } + + + /* Reset the vstatus and associated data structures */ + for (i=0; i<nupd; i++) { + ASSERT(updptr[updind[i]] != -1); + ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT); + vstatus[updind[i]] = VPQSTATUS_NOTPRESENT; + updptr[updind[i]] = -1; + } + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX, + imin(nparts*ncon, pwgts,1), imax(nparts*ncon, pwgts,1), + ComputeLoadImbalance(graph, nparts, pijbm), + graph->nbnd, nmoved, graph->mincut, graph->minvol); + if (ctrl->minconn) + printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads,1), isum(nparts, nads,1)); + printf("\n"); + } + + if (nmoved == 0 || + (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut)) + break; + } + + ipqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs an approximate articulation vertex test. + It assumes that the bfslvl, bfsind, and bfsmrk arrays are initialized + appropriately. */ +/*************************************************************************/ +idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where, + idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk) +{ + idx_t ii, j, k=0, head, tail, nhits, tnhits, from, BFSDEPTH=5; + + from = where[i]; + + /* Determine if the vertex is safe to move from a contiguity standpoint */ + for (tnhits=0, j=xadj[i]; j<xadj[i+1]; j++) { + if (where[adjncy[j]] == from) { + ASSERT(bfsmrk[adjncy[j]] == 0); + ASSERT(bfslvl[adjncy[j]] == 0); + bfsmrk[k=adjncy[j]] = 1; + tnhits++; + } + } + + /* Easy cases */ + if (tnhits == 0) + return 0; + if (tnhits == 1) { + bfsmrk[k] = 0; + return 0; + } + + ASSERT(bfslvl[i] == 0); + bfslvl[i] = 1; + + bfsind[0] = k; /* That was the last one from the previous loop */ + bfslvl[k] = 1; + bfsmrk[k] = 0; + head = 0; + tail = 1; + + /* Do a limited BFS traversal to see if you can get to all the other nodes */ + for (nhits=1; head<tail; ) { + ii = bfsind[head++]; + for (j=xadj[ii]; j<xadj[ii+1]; j++) { + if (where[k=adjncy[j]] == from) { + if (bfsmrk[k]) { + bfsmrk[k] = 0; + if (++nhits == tnhits) + break; + } + if (bfslvl[k] == 0 && bfslvl[ii] < BFSDEPTH) { + bfsind[tail++] = k; + bfslvl[k] = bfslvl[ii]+1; + } + } + } + if (nhits == tnhits) + break; + } + + /* Reset the various BFS related arrays */ + bfslvl[i] = 0; + for (j=0; j<tail; j++) + bfslvl[bfsind[j]] = 0; + + + /* Reset the bfsmrk array for the next vertex when has not already being cleared */ + if (nhits < tnhits) { + for (j=xadj[i]; j<xadj[i+1]; j++) + if (where[adjncy[j]] == from) + bfsmrk[adjncy[j]] = 0; + } + + return (nhits != tnhits); +} + + +/*************************************************************************/ +/*! + This function updates the edge and volume gains due to a vertex movement. + v from 'from' to 'to'. + + \param ctrl is the control structure. + \param graph is the graph being partitioned. + \param v is the vertex that is moving. + \param from is the original partition of v. + \param to is the new partition of v. + \param queue is the priority queue. If the queue is NULL, no priority-queue + related updates are performed. + \param vstatus is an array that marks the status of the vertex in terms + of the priority queue. If queue is NULL, this parameter is ignored. + \param r_nqupd is the number of vertices that have been inserted/removed + from the queue. If queue is NULL, this parameter is ignored. + \param updptr stores the index of each vertex in updind. If queue is NULL, + this parameter is ignored. + \param updind is the list of vertices that have been inserted/removed from + the queue. If queue is NULL, this parameter is ignored. + \param vmarker is of size nvtxs and is used internally as a temporary array. + On entry and return all of its entries are 0. + \param pmarker is of sie nparts and is used internally as a temporary marking + array. On entry and return all of its entries are -1. + \param modind is an array of size nvtxs and is used to keep track of the + list of vertices whose gains need to be updated. +*/ +/*************************************************************************/ +void KWayVolUpdate(ctrl_t *ctrl, graph_t *graph, idx_t v, idx_t from, + idx_t to, ipq_t *queue, idx_t *vstatus, idx_t *r_nupd, idx_t *updptr, + idx_t *updind, idx_t bndtype, idx_t *vmarker, idx_t *pmarker, + idx_t *modind) +{ + idx_t i, ii, iii, j, jj, k, kk, l, u, nmod, other, me, myidx; + idx_t *xadj, *vsize, *adjncy, *where; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + xadj = graph->xadj; + adjncy = graph->adjncy; + vsize = graph->vsize; + where = graph->where; + + myrinfo = graph->vkrinfo+v; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + + /*====================================================================== + * Remove the contributions on the gain made by 'v'. + *=====================================================================*/ + for (k=0; k<myrinfo->nnbrs; k++) + pmarker[mynbrs[k].pid] = k; + pmarker[from] = k; + + myidx = pmarker[to]; /* Keep track of the index in mynbrs of the 'to' domain */ + + for (j=xadj[v]; j<xadj[v+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + if (other == from) { + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv += vsize[v]; + } + } + else { + ASSERT(pmarker[other] != -1); + + if (mynbrs[pmarker[other]].ned > 1) { + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv += vsize[v]; + } + } + else { /* There is only one connection */ + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] != -1) + onbrs[k].gv -= vsize[v]; + } + } + } + } + + for (k=0; k<myrinfo->nnbrs; k++) + pmarker[mynbrs[k].pid] = -1; + pmarker[from] = -1; + + + /*====================================================================== + * Update the id/ed of vertex 'v' + *=====================================================================*/ + if (myidx == -1) { + myidx = myrinfo->nnbrs++; + ASSERT(myidx < xadj[v+1]-xadj[v]); + mynbrs[myidx].ned = 0; + } + myrinfo->ned += myrinfo->nid-mynbrs[myidx].ned; + SWAP(myrinfo->nid, mynbrs[myidx].ned, j); + if (mynbrs[myidx].ned == 0) + mynbrs[myidx] = mynbrs[--myrinfo->nnbrs]; + else + mynbrs[myidx].pid = from; + + + /*====================================================================== + * Update the degrees of adjacent vertices and their volume gains + *=====================================================================*/ + vmarker[v] = 1; + modind[0] = v; + nmod = 1; + for (j=xadj[v]; j<xadj[v+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + + if (!vmarker[ii]) { /* The marking is done for boundary and max gv calculations */ + vmarker[ii] = 2; + modind[nmod++] = ii; + } + + myrinfo = graph->vkrinfo+ii; + if (myrinfo->inbr == -1) + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]+1); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + if (me == from) { + INC_DEC(myrinfo->ned, myrinfo->nid, 1); + } + else if (me == to) { + INC_DEC(myrinfo->nid, myrinfo->ned, 1); + } + + /* Remove the edgeweight from the 'pid == from' entry of the vertex */ + if (me != from) { + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == from) { + if (mynbrs[k].ned == 1) { + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; + vmarker[ii] = 1; /* You do a complete .gv calculation */ + + /* All vertices adjacent to 'ii' need to be updated */ + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + u = adjncy[jj]; + other = where[u]; + orinfo = graph->vkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kk<orinfo->nnbrs; kk++) { + if (onbrs[kk].pid == from) { + onbrs[kk].gv -= vsize[ii]; + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + else { + mynbrs[k].ned--; + + /* Update the gv due to single 'ii' connection to 'from' */ + if (mynbrs[k].ned == 1) { + /* find the vertex 'u' that 'ii' was connected into 'from' */ + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + u = adjncy[jj]; + other = where[u]; + + if (other == from) { + orinfo = graph->vkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + /* The following is correct because domains in common + between ii and u will lead to a reduction over the + previous gain, whereas domains only in u but not in + ii, will lead to no change as opposed to the earlier + increase */ + for (kk=0; kk<orinfo->nnbrs; kk++) + onbrs[kk].gv += vsize[ii]; + + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + break; + } + } + } + + + /* Add the edgeweight to the 'pid == to' entry of the vertex */ + if (me != to) { + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == to) { + mynbrs[k].ned++; + + /* Update the gv due to non-single 'ii' connection to 'to' */ + if (mynbrs[k].ned == 2) { + /* find the vertex 'u' that 'ii' was connected into 'to' */ + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + u = adjncy[jj]; + other = where[u]; + + if (u != v && other == to) { + orinfo = graph->vkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + for (kk=0; kk<orinfo->nnbrs; kk++) + onbrs[kk].gv -= vsize[ii]; + + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + break; + } + } + + if (k == myrinfo->nnbrs) { + mynbrs[myrinfo->nnbrs].pid = to; + mynbrs[myrinfo->nnbrs++].ned = 1; + vmarker[ii] = 1; /* You do a complete .gv calculation */ + + /* All vertices adjacent to 'ii' need to be updated */ + for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { + u = adjncy[jj]; + other = where[u]; + orinfo = graph->vkrinfo+u; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kk<orinfo->nnbrs; kk++) { + if (onbrs[kk].pid == to) { + onbrs[kk].gv += vsize[ii]; + if (!vmarker[u]) { /* Need to update boundary etc */ + vmarker[u] = 2; + modind[nmod++] = u; + } + break; + } + } + } + } + } + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + + /*====================================================================== + * Add the contributions on the volume gain due to 'v' + *=====================================================================*/ + myrinfo = graph->vkrinfo+v; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + for (k=0; k<myrinfo->nnbrs; k++) + pmarker[mynbrs[k].pid] = k; + pmarker[to] = k; + + for (j=xadj[v]; j<xadj[v+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + if (other == to) { + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv -= vsize[v]; + } + } + else { + ASSERT(pmarker[other] != -1); + + if (mynbrs[pmarker[other]].ned > 1) { + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] == -1) + onbrs[k].gv -= vsize[v]; + } + } + else { /* There is only one connection */ + for (k=0; k<orinfo->nnbrs; k++) { + if (pmarker[onbrs[k].pid] != -1) + onbrs[k].gv += vsize[v]; + } + } + } + } + for (k=0; k<myrinfo->nnbrs; k++) + pmarker[mynbrs[k].pid] = -1; + pmarker[to] = -1; + + + /*====================================================================== + * Recompute the volume information of the 'hard' nodes, and update the + * max volume gain for all the modified vertices and the priority queue + *=====================================================================*/ + for (iii=0; iii<nmod; iii++) { + i = modind[iii]; + me = where[i]; + + myrinfo = graph->vkrinfo+i; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + if (vmarker[i] == 1) { /* Only complete gain updates go through */ + for (k=0; k<myrinfo->nnbrs; k++) + mynbrs[k].gv = 0; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (kk=0; kk<orinfo->nnbrs; kk++) + pmarker[onbrs[kk].pid] = kk; + pmarker[other] = 1; + + if (me == other) { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (pmarker[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + ASSERT(pmarker[me] != -1); + + /* I'm the only connection of 'ii' in 'me' */ + if (onbrs[pmarker[me]].ned == 1) { + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (pmarker[mynbrs[k].pid] != -1) + mynbrs[k].gv += vsize[ii]; + } + } + else { + /* Find which domains 'i' is connected and 'ii' is not and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (pmarker[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + } + + for (kk=0; kk<orinfo->nnbrs; kk++) + pmarker[onbrs[kk].pid] = -1; + pmarker[other] = -1; + + } + } + + /* Compute the overall gv for that node */ + myrinfo->gv = IDX_MIN; + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].gv > myrinfo->gv) + myrinfo->gv = mynbrs[k].gv; + } + + /* Add the xtra gain due to id == 0 */ + if (myrinfo->ned > 0 && myrinfo->nid == 0) + myrinfo->gv += vsize[i]; + + + /*====================================================================== + * Maintain a consistent boundary + *=====================================================================*/ + if (bndtype == BNDTYPE_REFINE) { + if (myrinfo->gv >= 0 && graph->bndptr[i] == -1) + BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i); + + if (myrinfo->gv < 0 && graph->bndptr[i] != -1) + BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i); + } + else { + if (myrinfo->ned > 0 && graph->bndptr[i] == -1) + BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i); + + if (myrinfo->ned == 0 && graph->bndptr[i] != -1) + BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i); + } + + + /*====================================================================== + * Update the priority queue appropriately (if allowed) + *=====================================================================*/ + if (queue != NULL) { + if (vstatus[i] != VPQSTATUS_EXTRACTED) { + if (graph->bndptr[i] != -1) { /* In-boundary vertex */ + if (vstatus[i] == VPQSTATUS_PRESENT) { + ipqUpdate(queue, i, myrinfo->gv); + } + else { + ipqInsert(queue, i, myrinfo->gv); + vstatus[i] = VPQSTATUS_PRESENT; + ListInsert(*r_nupd, updind, updptr, i); + } + } + else { /* Off-boundary vertex */ + if (vstatus[i] == VPQSTATUS_PRESENT) { + ipqDelete(queue, i); + vstatus[i] = VPQSTATUS_NOTPRESENT; + ListDelete(*r_nupd, updind, updptr, i); + } + } + } + } + + vmarker[i] = 0; + } +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + decreasing ed/sqrt(nnbrs)-id order. Note this is just an + approximation, as the ed is often split across different subdomains + and the sqrt(nnbrs) is just a crude approximation. + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + \param ffactor is the \em fudge-factor for allowing positive gain moves + to violate the max-pwgt constraint. + \param omode is the type of optimization that will performed among + OMODE_REFINE and OMODE_BALANCE + + +*/ +/**************************************************************************/ +void Greedy_KWayEdgeStats(ctrl_t *ctrl, graph_t *graph) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t i, ii, iii, j, k, l, nvtxs, nparts, gain, u, v, uw, vw; + idx_t *xadj, *adjncy, *adjwgt, *vwgt; + idx_t *where, *pwgts, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t nbnd; + ckrinfo_t *urinfo, *vrinfo; + cnbr_t *unbrs, *vnbrs; + real_t *tpwgts, ubfactor; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = graph->adjwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts+2); + maxpwgts = iwspacemalloc(ctrl, nparts+2); + + ubfactor = ctrl->ubfactors[0]; + for (i=0; i<nparts; i++) { + maxpwgts[i] = tpwgts[i]*graph->tvwgt[0]*ubfactor; + minpwgts[i] = tpwgts[i]*graph->tvwgt[0]*(0.95/ubfactor); + } + maxpwgts[nparts] = maxpwgts[nparts+1] = 0; + minpwgts[nparts] = minpwgts[nparts+1] = 0; + + /* go and determine the positive gain valid swaps */ + nbnd = graph->nbnd; + + for (ii=0; ii<nbnd; ii++) { + u = bndind[ii]; + uw = where[u]; + + urinfo = graph->ckrinfo+u; + unbrs = ctrl->cnbrpool + urinfo->inbr; + + for (j=xadj[u]; j<xadj[u+1]; j++) { + v = adjncy[j]; + vw = where[v]; + + vrinfo = graph->ckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + if (uw == vw) + continue; + if (pwgts[uw] - vwgt[u] + vwgt[v] > maxpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] > maxpwgts[vw]) + continue; + + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + if (k < 0) + printf("Something went wrong!\n"); + gain = unbrs[k].ed-urinfo->id; + + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + if (k < 0) + printf("Something went wrong!\n"); + gain += vnbrs[k].ed-vrinfo->id; + + gain -= 2*adjwgt[j]; + + if (gain > 0) + printf(" Gain: %"PRIDX" for moving (%"PRIDX", %"PRIDX") between (%"PRIDX", %"PRIDX")\n", + gain, u, v, uw, vw); + } + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! K-way partitioning optimization in which the vertices are visited in + random order and the best edge is selected to swap its incident vertices + + \param graph is the graph that is being refined. + \param niter is the number of refinement iterations. + +*/ +/**************************************************************************/ +void Greedy_KWayEdgeCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter) +{ + /* Common variables to all types of kway-refinement/balancing routines */ + idx_t ii, j, k, pass, nvtxs, nparts, u, v, uw, vw, gain, bestgain, jbest; + idx_t from, me, to, oldcut, nmoved; + idx_t *xadj, *adjncy, *adjwgt, *vwgt; + idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minpwgts, *maxpwgts; + idx_t bndtype = BNDTYPE_REFINE; + real_t *tpwgts, ubfactor; + + /* Edgecut-specific/different variables */ + idx_t nbnd, oldnnbrs; + ckrinfo_t *myrinfo, *urinfo, *vrinfo; + cnbr_t *unbrs, *vnbrs; + + WCOREPUSH; + + /* Link the graph fields */ + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + + where = graph->where; + pwgts = graph->pwgts; + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + /* Setup the weight intervals of the various subdomains */ + minpwgts = iwspacemalloc(ctrl, nparts+2); + maxpwgts = iwspacemalloc(ctrl, nparts+2); + + ubfactor = gk_max(ctrl->ubfactors[0], ComputeLoadImbalance(graph, nparts, ctrl->pijbm)); + for (k=0; k<nparts; k++) { + maxpwgts[k] = tpwgts[k]*graph->tvwgt[0]*ubfactor; + minpwgts[k] = tpwgts[k]*graph->tvwgt[0]*(1.0/ubfactor); + } + maxpwgts[nparts] = maxpwgts[nparts+1] = 0; + minpwgts[nparts] = minpwgts[nparts+1] = 0; + + perm = iwspacemalloc(ctrl, nvtxs); + + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("GRE: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," + " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX"\n", + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), minpwgts[0], maxpwgts[0], + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nvtxs, graph->nbnd, graph->mincut); + } + + + /*===================================================================== + * The top-level refinement loop + *======================================================================*/ + for (pass=0; pass<niter; pass++) { + GKASSERT(ComputeCut(graph, where) == graph->mincut); + + oldcut = graph->mincut; + nbnd = graph->nbnd; + nmoved = 0; + + /* Insert the boundary vertices in the priority queue */ + /* Visit the vertices in random order and see if you can swap them */ + irandArrayPermute(nvtxs, perm, nbnd, 1); + for (ii=0; ii<nvtxs; ii++) { + if (bndptr[u=perm[ii]] == -1) + continue; + + uw = where[u]; + + urinfo = graph->ckrinfo+u; + unbrs = ctrl->cnbrpool + urinfo->inbr; + + bestgain = 0; + jbest = -1; + for (j=xadj[u]; j<xadj[u+1]; j++) { + v = adjncy[j]; + vw = where[v]; + + if (uw == vw) + continue; + if (pwgts[uw] - vwgt[u] + vwgt[v] > maxpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] > maxpwgts[vw]) + continue; + if (pwgts[uw] - vwgt[u] + vwgt[v] < minpwgts[uw] || + pwgts[vw] - vwgt[v] + vwgt[u] < minpwgts[vw]) + continue; + + vrinfo = graph->ckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + gain = -2*adjwgt[j]; + + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + GKASSERT(k>=0); + gain += unbrs[k].ed-urinfo->id; + + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + GKASSERT(k>=0); + gain += vnbrs[k].ed-vrinfo->id; + + if (gain > bestgain && vnbrs[k].ed > adjwgt[j]) { + bestgain = gain; + jbest = j; + } + } + + if (jbest == -1) + continue; /* no valid positive swap */ + + + /*===================================================================== + * If we got here, we can now swap the vertices + *======================================================================*/ + v = adjncy[jbest]; + vw = where[v]; + + vrinfo = graph->ckrinfo+v; + vnbrs = ctrl->cnbrpool + vrinfo->inbr; + + /* move u to v's partition */ + for (k=urinfo->nnbrs-1; k>=0; k--) { + if (unbrs[k].pid == vw) + break; + } + GKASSERT(k>=0); + + from = uw; + to = vw; + + graph->mincut -= unbrs[k].ed-urinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + u, from, to, pwgts[from], pwgts[to], unbrs[k].ed-urinfo->id, graph->mincut)); + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt[u]); + UpdateMovedVertexInfoAndBND(u, from, k, to, urinfo, unbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[u]; j<xadj[u+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + + /* move v to u's partition */ + for (k=vrinfo->nnbrs-1; k>=0; k--) { + if (vnbrs[k].pid == uw) + break; + } + GKASSERT(k>=0); +#ifdef XXX + if (k < 0) { /* that was removed, go and re-insert it */ + k = vrinfo->nnbrs++; + vnbrs[k].pid = uw; + vnbrs[k].ed = 0; + } +#endif + + from = vw; + to = uw; + + graph->mincut -= vnbrs[k].ed-vrinfo->id; + nmoved++; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX" [%6"PRIDX" %6"PRIDX"]. Gain: %4"PRIDX". Cut: %6"PRIDX"\n", + v, from, to, pwgts[from], pwgts[to], vnbrs[k].ed-vrinfo->id, graph->mincut)); + + /* Update ID/ED and BND related information for the moved vertex */ + INC_DEC(pwgts[to], pwgts[from], vwgt[v]); + UpdateMovedVertexInfoAndBND(v, from, k, to, vrinfo, vnbrs, where, nbnd, + bndptr, bndind, bndtype); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[v]; j<xadj[v+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + oldnnbrs = myrinfo->nnbrs; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype); + + ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]); + } + } + + graph->nbnd = nbnd; + + if (ctrl->dbglvl&METIS_DBG_REFINE) { + printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"." + " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX"\n", + pwgts[iargmin(nparts, pwgts,1)], imax(nparts, pwgts,1), + ComputeLoadImbalance(graph, nparts, ctrl->pijbm), + graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where)); + } + + if (nmoved == 0 || graph->mincut == oldcut) + break; + } + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/kwayrefine.c b/3rdParty/metis/metis-5.1.1/libmetis/kwayrefine.c new file mode 100644 index 000000000..50bc65784 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/kwayrefine.c @@ -0,0 +1,677 @@ +/*! +\file +\brief Driving routines for multilevel k-way refinement + +\date Started 7/28/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: kwayrefine.c 20398 2016-11-22 17:17:12Z karypis $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point of cut-based refinement */ +/*************************************************************************/ +void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph) +{ + idx_t i, nlevels, contig=ctrl->contig; + graph_t *ptr; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); + + /* Determine how many levels are there */ + for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); + + /* Compute the parameters of the coarsest graph */ + ComputeKWayPartitionParams(ctrl, graph); + + /* Try to minimize the sub-domain connectivity */ + if (ctrl->minconn) + EliminateSubDomainEdges(ctrl, graph); + + /* Deal with contiguity constraints at the beginning */ + if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) { + EliminateComponents(ctrl, graph); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + + ctrl->contig = 0; + } + + /* Refine each successively finer graph */ + for (i=0; ;i++) { + if (ctrl->minconn && i == nlevels/2) + EliminateSubDomainEdges(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + + if (2*i >= nlevels && !IsBalanced(ctrl, graph, .02)) { + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 1, 0, OMODE_BALANCE); + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + } + + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 5.0, OMODE_REFINE); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); + + /* Deal with contiguity constraints in the middle */ + if (contig && i == nlevels/2) { + if (FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) { + EliminateComponents(ctrl, graph); + + if (!IsBalanced(ctrl, graph, .02)) { + ctrl->contig = 1; + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + ctrl->contig = 0; + } + } + } + + if (graph == orggraph) + break; + + graph = graph->finer; + + graph_ReadFromDisk(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); + ASSERT(graph->vwgt != NULL); + + ProjectKWayPartition(ctrl, graph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); + } + + /* Deal with contiguity requirement at the end */ + ctrl->contig = contig; + if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) + EliminateComponents(ctrl, graph); + + if (!IsBalanced(ctrl, graph, 0.0)) { + ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE); + Greedy_KWayOptimize(ctrl, graph, 10, 0, OMODE_BALANCE); + + ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE); + Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); + } + + if (ctrl->contig) + ASSERT(FindPartitionInducedComponents(graph, graph->where, NULL, NULL) == ctrl->nparts); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); +} + + +/*************************************************************************/ +/*! This function allocates memory for the k-way cut-based refinement */ +/*************************************************************************/ +void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph) +{ + + graph->pwgts = imalloc(ctrl->nparts*graph->ncon, "AllocateKWayPartitionMemory: pwgts"); + graph->where = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: where"); + graph->bndptr = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: bndptr"); + graph->bndind = imalloc(graph->nvtxs, "AllocateKWayPartitionMemory: bndind"); + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + graph->ckrinfo = (ckrinfo_t *)gk_malloc(graph->nvtxs*sizeof(ckrinfo_t), + "AllocateKWayPartitionMemory: ckrinfo"); + break; + + case METIS_OBJTYPE_VOL: + graph->vkrinfo = (vkrinfo_t *)gk_malloc(graph->nvtxs*sizeof(vkrinfo_t), + "AllocateKWayVolPartitionMemory: vkrinfo"); + + /* This is to let the cut-based -minconn and -contig large-scale graph + changes to go through */ + graph->ckrinfo = (ckrinfo_t *)graph->vkrinfo; + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + +} + + +/*************************************************************************/ +/*! This function computes the initial id/ed for cut-based partitioning */ +/**************************************************************************/ +void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, k, l, nvtxs, ncon, nparts, nbnd, mincut, me, other; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where, *bndind, *bndptr; + + nparts = ctrl->nparts; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + pwgts = iset(nparts*ncon, 0, graph->pwgts); + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + nbnd = mincut = 0; + + /* Compute pwgts */ + if (ncon == 1) { + for (i=0; i<nvtxs; i++) { + ASSERT(where[i] >= 0 && where[i] < nparts); + pwgts[where[i]] += vwgt[i]; + } + } + else { + for (i=0; i<nvtxs; i++) { + me = where[i]; + for (j=0; j<ncon; j++) + pwgts[me*ncon+j] += vwgt[i*ncon+j]; + } + } + + /* Compute the required info for refinement */ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + { + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + cnbrpoolReset(ctrl); + + for (i=0; i<nvtxs; i++) { + me = where[i]; + myrinfo = graph->ckrinfo+i; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (me == where[adjncy[j]]) + myrinfo->id += adjwgt[j]; + else + myrinfo->ed += adjwgt[j]; + } + + /* Time to compute the particular external degrees */ + if (myrinfo->ed > 0) { + mincut += myrinfo->ed; + + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + other = where[adjncy[j]]; + if (me != other) { + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ed += adjwgt[j]; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].pid = other; + mynbrs[k].ed = adjwgt[j]; + myrinfo->nnbrs++; + } + } + } + + ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]); + + /* Only ed-id>=0 nodes are considered to be in the boundary */ + if (myrinfo->ed-myrinfo->id >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + else { + myrinfo->inbr = -1; + } + } + + graph->mincut = mincut/2; + graph->nbnd = nbnd; + + } + ASSERT(CheckBnd2(graph)); + break; + + case METIS_OBJTYPE_VOL: + { + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs); + vnbrpoolReset(ctrl); + + /* Compute now the id/ed degrees */ + for (i=0; i<nvtxs; i++) { + me = where[i]; + myrinfo = graph->vkrinfo+i; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (me == where[adjncy[j]]) + myrinfo->nid++; + else + myrinfo->ned++; + } + + /* Time to compute the particular external degrees */ + if (myrinfo->ned > 0) { + mincut += myrinfo->ned; + + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + other = where[adjncy[j]]; + if (me != other) { + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == other) { + mynbrs[k].ned++; + break; + } + } + if (k == myrinfo->nnbrs) { + mynbrs[k].gv = 0; + mynbrs[k].pid = other; + mynbrs[k].ned = 1; + myrinfo->nnbrs++; + } + } + } + ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]); + } + else { + myrinfo->inbr = -1; + } + } + graph->mincut = mincut/2; + + ComputeKWayVolGains(ctrl, graph); + } + ASSERT(graph->minvol == ComputeVolume(graph, graph->where)); + break; + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + +} + + +/*************************************************************************/ +/*! This function projects a partition, and at the same time computes the + parameters for refinement. */ +/*************************************************************************/ +void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, k, nvtxs, nbnd, nparts, me, other, istart, iend, tid, ted; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *cmap, *where, *bndptr, *bndind, *cwhere, *htable; + graph_t *cgraph; + int dropedges; + + WCOREPUSH; + + dropedges = ctrl->dropedges; + + nparts = ctrl->nparts; + + cgraph = graph->coarser; + cwhere = cgraph->where; + + nvtxs = graph->nvtxs; + cmap = graph->cmap; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + AllocateKWayPartitionMemory(ctrl, graph); + + where = graph->where; + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + htable = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + /* Compute the required info for refinement */ + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + ASSERT(CheckBnd2(cgraph)); + { + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + /* go through and project partition and compute id/ed for the nodes */ + for (i=0; i<nvtxs; i++) { + k = cmap[i]; + where[i] = cwhere[k]; + cmap[i] = (dropedges ? 1 : cgraph->ckrinfo[k].ed); /* For optimization */ + } + + memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs); + cnbrpoolReset(ctrl); + + for (nbnd=0, i=0; i<nvtxs; i++) { + istart = xadj[i]; + iend = xadj[i+1]; + + myrinfo = graph->ckrinfo+i; + + if (cmap[i] == 0) { /* Interior node. Note that cmap[i] = crinfo[cmap[i]].ed */ + for (tid=0, j=istart; j<iend; j++) + tid += adjwgt[j]; + + myrinfo->id = tid; + myrinfo->inbr = -1; + } + else { /* Potentially an interface node */ + myrinfo->inbr = cnbrpoolGetNext(ctrl, iend-istart+1); + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + me = where[i]; + for (tid=0, ted=0, j=istart; j<iend; j++) { + other = where[adjncy[j]]; + if (me == other) { + tid += adjwgt[j]; + } + else { + ted += adjwgt[j]; + if ((k = htable[other]) == -1) { + htable[other] = myrinfo->nnbrs; + mynbrs[myrinfo->nnbrs].pid = other; + mynbrs[myrinfo->nnbrs++].ed = adjwgt[j]; + } + else { + mynbrs[k].ed += adjwgt[j]; + } + } + } + myrinfo->id = tid; + myrinfo->ed = ted; + + /* Remove space for edegrees if it was interior */ + if (ted == 0) { + ctrl->nbrpoolcpos -= iend-istart+1; + myrinfo->inbr = -1; + } + else { + if (ted-tid >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + + for (j=0; j<myrinfo->nnbrs; j++) + htable[mynbrs[j].pid] = -1; + } + } + } + + graph->nbnd = nbnd; + + } + ASSERT(CheckBnd2(graph)); + break; + + case METIS_OBJTYPE_VOL: + { + vkrinfo_t *myrinfo; + vnbr_t *mynbrs; + + ASSERT(cgraph->minvol == ComputeVolume(cgraph, cgraph->where)); + + /* go through and project partition and compute id/ed for the nodes */ + for (i=0; i<nvtxs; i++) { + k = cmap[i]; + where[i] = cwhere[k]; + cmap[i] = (dropedges ? 1 : cgraph->vkrinfo[k].ned); /* For optimization */ + } + + memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs); + vnbrpoolReset(ctrl); + + for (i=0; i<nvtxs; i++) { + istart = xadj[i]; + iend = xadj[i+1]; + myrinfo = graph->vkrinfo+i; + + if (cmap[i] == 0) { /* Note that cmap[i] = crinfo[cmap[i]].ed */ + myrinfo->nid = iend-istart; + myrinfo->inbr = -1; + } + else { /* Potentially an interface node */ + myrinfo->inbr = vnbrpoolGetNext(ctrl, iend-istart+1); + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + me = where[i]; + for (tid=0, ted=0, j=istart; j<iend; j++) { + other = where[adjncy[j]]; + if (me == other) { + tid++; + } + else { + ted++; + if ((k = htable[other]) == -1) { + htable[other] = myrinfo->nnbrs; + mynbrs[myrinfo->nnbrs].gv = 0; + mynbrs[myrinfo->nnbrs].pid = other; + mynbrs[myrinfo->nnbrs++].ned = 1; + } + else { + mynbrs[k].ned++; + } + } + } + myrinfo->nid = tid; + myrinfo->ned = ted; + + /* Remove space for edegrees if it was interior */ + if (ted == 0) { + ctrl->nbrpoolcpos -= iend-istart+1; + myrinfo->inbr = -1; + } + else { + for (j=0; j<myrinfo->nnbrs; j++) + htable[mynbrs[j].pid] = -1; + } + } + } + + ComputeKWayVolGains(ctrl, graph); + + ASSERT(graph->minvol == ComputeVolume(graph, graph->where)); + } + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + graph->mincut = (dropedges ? ComputeCut(graph, where) : cgraph->mincut); + icopy(nparts*graph->ncon, cgraph->pwgts, graph->pwgts); + + FreeGraph(&graph->coarser); + graph->coarser = NULL; + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function computes the boundary definition for balancing. */ +/*************************************************************************/ +void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype) +{ + idx_t i, nvtxs, nbnd; + idx_t *bndind, *bndptr; + + nvtxs = graph->nvtxs; + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + nbnd = 0; + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + /* Compute the boundary */ + if (bndtype == BNDTYPE_REFINE) { + for (i=0; i<nvtxs; i++) { + if (graph->ckrinfo[i].ed-graph->ckrinfo[i].id >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + else { /* BNDTYPE_BALANCE */ + for (i=0; i<nvtxs; i++) { + if (graph->ckrinfo[i].ed > 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + break; + + case METIS_OBJTYPE_VOL: + /* Compute the boundary */ + if (bndtype == BNDTYPE_REFINE) { + for (i=0; i<nvtxs; i++) { + if (graph->vkrinfo[i].gv >= 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + else { /* BNDTYPE_BALANCE */ + for (i=0; i<nvtxs; i++) { + if (graph->vkrinfo[i].ned > 0) + BNDInsert(nbnd, bndind, bndptr, i); + } + } + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + graph->nbnd = nbnd; +} + + +/*************************************************************************/ +/*! This function computes the initial gains in the communication volume */ +/*************************************************************************/ +void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, l, nvtxs, nparts, me, other, pid; + idx_t *xadj, *vsize, *adjncy, *adjwgt, *where, + *bndind, *bndptr, *ophtable; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + WCOREPUSH; + + nparts = ctrl->nparts; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + ophtable = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + + /* Compute the volume gains */ + graph->minvol = graph->nbnd = 0; + for (i=0; i<nvtxs; i++) { + myrinfo = graph->vkrinfo+i; + myrinfo->gv = IDX_MIN; + + if (myrinfo->nnbrs > 0) { + me = where[i]; + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + graph->minvol += myrinfo->nnbrs*vsize[i]; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + + for (k=0; k<orinfo->nnbrs; k++) + ophtable[onbrs[k].pid] = k; + ophtable[other] = 1; /* this is to simplify coding */ + + if (me == other) { + /* Find which domains 'i' is connected to but 'ii' is not + and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (ophtable[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + else { + ASSERT(ophtable[me] != -1); + + if (onbrs[ophtable[me]].ned == 1) { + /* I'm the only connection of 'ii' in 'me' */ + /* Increase the gains for all the common domains between 'i' and 'ii' */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (ophtable[mynbrs[k].pid] != -1) + mynbrs[k].gv += vsize[ii]; + } + } + else { + /* Find which domains 'i' is connected to and 'ii' is not + and update their gain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (ophtable[mynbrs[k].pid] == -1) + mynbrs[k].gv -= vsize[ii]; + } + } + } + + /* Reset the marker vector */ + for (k=0; k<orinfo->nnbrs; k++) + ophtable[onbrs[k].pid] = -1; + ophtable[other] = -1; + } + + /* Compute the max vgain */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].gv > myrinfo->gv) + myrinfo->gv = mynbrs[k].gv; + } + + /* Add the extra gain due to id == 0 */ + if (myrinfo->ned > 0 && myrinfo->nid == 0) + myrinfo->gv += vsize[i]; + } + + if (myrinfo->gv >= 0) + BNDInsert(graph->nbnd, bndind, bndptr, i); + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function checks if the partition weights are within the balance +contraints */ +/*************************************************************************/ +int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor) +{ + return + (ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors) + <= ffactor); +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/macros.h b/3rdParty/metis/metis-5.1.1/libmetis/macros.h new file mode 100644 index 000000000..3f6f7d9ed --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/macros.h @@ -0,0 +1,258 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * macros.h + * + * This file contains macros used in multilevel + * + * Started 9/25/94 + * George + * + * $Id: macros.h 10060 2011-06-02 18:56:30Z karypis $ + * + */ + +#ifndef _LIBMETIS_MACROS_H_ +#define _LIBMETIS_MACROS_H_ + +/************************************************************************* +* The following macro returns a random number in the specified range +**************************************************************************/ +#define AND(a, b) ((a) < 0 ? ((-(a))&(b)) : ((a)&(b))) +#define OR(a, b) ((a) < 0 ? -((-(a))|(b)) : ((a)|(b))) +#define XOR(a, b) ((a) < 0 ? -((-(a))^(b)) : ((a)^(b))) + +//#define icopy(n, a, b) (idx_t *)memcpy((void *)(b), (void *)(a), sizeof(idx_t)*(n)) + +#define HASHFCT(key, size) ((key)%(size)) +#define SWAP gk_SWAP + +/* gets the appropriate option value */ +#define GETOPTION(options, idx, defval) \ + ((options) == NULL || (options)[idx] == -1 ? defval : (options)[idx]) + +/* converts a user provided ufactor into a real ubfactor */ +#define I2RUBFACTOR(ufactor) (1.0+0.001*(ufactor)) + +/* set/reset the current workspace core */ +#define WCOREPUSH wspacepush(ctrl) +#define WCOREPOP wspacepop(ctrl) + + + +/************************************************************************* +* These macros insert and remove nodes from a Direct Access list +**************************************************************************/ +#define ListInsert(n, lind, lptr, i) \ + do { \ + ASSERT(lptr[i] == -1); \ + lind[n] = i; \ + lptr[i] = (n)++;\ + } while(0) + +#define ListDelete(n, lind, lptr, i) \ + do { \ + ASSERT(lptr[i] != -1); \ + lind[lptr[i]] = lind[--(n)]; \ + lptr[lind[n]] = lptr[i]; \ + lptr[i] = -1; \ + } while(0) + + +/************************************************************************* +* These macros insert and remove nodes from the boundary list +**************************************************************************/ +#define BNDInsert(nbnd, bndind, bndptr, vtx) \ + ListInsert(nbnd, bndind, bndptr, vtx) + +#define BNDDelete(nbnd, bndind, bndptr, vtx) \ + ListDelete(nbnd, bndind, bndptr, vtx) + + +/************************************************************************* +* These macros deal with id/ed updating during k-way refinement +**************************************************************************/ +#define UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, \ + nbnd, bndptr, bndind, bndtype) \ + do { \ + where[i] = to; \ + myrinfo->ed += myrinfo->id-mynbrs[k].ed; \ + SWAP(myrinfo->id, mynbrs[k].ed, j); \ + if (mynbrs[k].ed == 0) \ + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \ + else \ + mynbrs[k].pid = from; \ + \ + /* Update the boundary information. Both deletion and addition is \ + allowed as this routine can be used for moving arbitrary nodes. */ \ + if (bndtype == BNDTYPE_REFINE) { \ + if (bndptr[i] != -1 && myrinfo->ed - myrinfo->id < 0) \ + BNDDelete(nbnd, bndind, bndptr, i); \ + if (bndptr[i] == -1 && myrinfo->ed - myrinfo->id >= 0) \ + BNDInsert(nbnd, bndind, bndptr, i); \ + } \ + else { \ + if (bndptr[i] != -1 && myrinfo->ed <= 0) \ + BNDDelete(nbnd, bndind, bndptr, i); \ + if (bndptr[i] == -1 && myrinfo->ed > 0) \ + BNDInsert(nbnd, bndind, bndptr, i); \ + } \ + } while(0) + + +#define UpdateAdjacentVertexInfoAndBND(ctrl, vid, adjlen, me, from, to, \ + myrinfo, ewgt, nbnd, bndptr, bndind, bndtype) \ + do { \ + idx_t k; \ + cnbr_t *mynbrs; \ + \ + if (myrinfo->inbr == -1) { \ + myrinfo->inbr = cnbrpoolGetNext(ctrl, adjlen+1); \ + myrinfo->nnbrs = 0; \ + } \ + ASSERT(CheckRInfo(ctrl, myrinfo)); \ + \ + mynbrs = ctrl->cnbrpool + myrinfo->inbr; \ + \ + /* Update global ID/ED and boundary */ \ + if (me == from) { \ + INC_DEC(myrinfo->ed, myrinfo->id, (ewgt)); \ + if (bndtype == BNDTYPE_REFINE) { \ + if (myrinfo->ed-myrinfo->id >= 0 && bndptr[(vid)] == -1) \ + BNDInsert(nbnd, bndind, bndptr, (vid)); \ + } \ + else { \ + if (myrinfo->ed > 0 && bndptr[(vid)] == -1) \ + BNDInsert(nbnd, bndind, bndptr, (vid)); \ + } \ + } \ + else if (me == to) { \ + INC_DEC(myrinfo->id, myrinfo->ed, (ewgt)); \ + if (bndtype == BNDTYPE_REFINE) { \ + if (myrinfo->ed-myrinfo->id < 0 && bndptr[(vid)] != -1) \ + BNDDelete(nbnd, bndind, bndptr, (vid)); \ + } \ + else { \ + if (myrinfo->ed <= 0 && bndptr[(vid)] != -1) \ + BNDDelete(nbnd, bndind, bndptr, (vid)); \ + } \ + } \ + \ + /* Remove contribution from the .ed of 'from' */ \ + if (me != from) { \ + for (k=0; k<myrinfo->nnbrs; k++) { \ + if (mynbrs[k].pid == from) { \ + if (mynbrs[k].ed == (ewgt)) \ + mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \ + else \ + mynbrs[k].ed -= (ewgt); \ + break; \ + } \ + } \ + } \ + \ + /* Add contribution to the .ed of 'to' */ \ + if (me != to) { \ + for (k=0; k<myrinfo->nnbrs; k++) { \ + if (mynbrs[k].pid == to) { \ + mynbrs[k].ed += (ewgt); \ + break; \ + } \ + } \ + if (k == myrinfo->nnbrs) { \ + mynbrs[k].pid = to; \ + mynbrs[k].ed = (ewgt); \ + myrinfo->nnbrs++; \ + } \ + } \ + \ + ASSERT(CheckRInfo(ctrl, myrinfo));\ + } while(0) + + +#define UpdateQueueInfo(queue, vstatus, vid, me, from, to, myrinfo, oldnnbrs, \ + nupd, updptr, updind, bndtype) \ + do { \ + real_t rgain; \ + \ + if (me == to || me == from || oldnnbrs != myrinfo->nnbrs) { \ + rgain = (myrinfo->nnbrs > 0 ? \ + 1.0*myrinfo->ed/sqrt(myrinfo->nnbrs) : 0.0) - myrinfo->id; \ + \ + if (bndtype == BNDTYPE_REFINE) { \ + if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \ + if (myrinfo->ed-myrinfo->id >= 0) \ + rpqUpdate(queue, (vid), rgain); \ + else { \ + rpqDelete(queue, (vid)); \ + vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \ + ListDelete(nupd, updind, updptr, (vid)); \ + } \ + } \ + else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed-myrinfo->id >= 0) { \ + rpqInsert(queue, (vid), rgain); \ + vstatus[(vid)] = VPQSTATUS_PRESENT; \ + ListInsert(nupd, updind, updptr, (vid)); \ + } \ + } \ + else { \ + if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \ + if (myrinfo->ed > 0) \ + rpqUpdate(queue, (vid), rgain); \ + else { \ + rpqDelete(queue, (vid)); \ + vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \ + ListDelete(nupd, updind, updptr, (vid)); \ + } \ + } \ + else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed > 0) { \ + rpqInsert(queue, (vid), rgain); \ + vstatus[(vid)] = VPQSTATUS_PRESENT; \ + ListInsert(nupd, updind, updptr, (vid)); \ + } \ + } \ + } \ + } while(0) + + + +/*************************************************************************/ +/*! This macro determines the set of subdomains that a vertex can move to + without increasins the maxndoms. */ +/*************************************************************************/ +#define SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, vtmp) \ + do { \ + idx_t j, k, l, nadd, to; \ + for (j=0; j<myrinfo->nnbrs; j++) { \ + safetos[to = mynbrs[j].pid] = 0; \ + \ + /* uncompress the connectivity info for the 'to' subdomain */ \ + for (k=0; k<nads[to]; k++) \ + vtmp[adids[to][k]] = 1; \ + \ + for (nadd=0, k=0; k<myrinfo->nnbrs; k++) { \ + if (k == j) \ + continue; \ + \ + l = mynbrs[k].pid; \ + if (vtmp[l] == 0) { \ + if (nads[l] > maxndoms-1) { \ + nadd = maxndoms; \ + break; \ + } \ + nadd++; \ + } \ + } \ + if (nads[to]+nadd <= maxndoms) \ + safetos[to] = 1; \ + if (nadd == 0) \ + safetos[to] = 2; \ + \ + /* cleanup the connectivity info due to the 'to' subdomain */ \ + for (k=0; k<nads[to]; k++) \ + vtmp[adids[to][k]] = 0; \ + } \ + } while (0) + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/mcutil.c b/3rdParty/metis/metis-5.1.1/libmetis/mcutil.c new file mode 100644 index 000000000..6e20f556a --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/mcutil.c @@ -0,0 +1,330 @@ +/* + * mutil.c + * + * This file contains various utility functions for the MOC portion of the + * code + * + * Started 2/15/98 + * George + * + * $Id: mcutil.c 13901 2013-03-24 16:17:03Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function compares two vectors x & y and returns true + if \forall i, x[i] <= y[i]. +*/ +/**************************************************************************/ +int rvecle(idx_t n, real_t *x, real_t *y) +{ + for (n--; n>=0; n--) { + if (x[n] > y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function compares two vectors x & y and returns true + if \forall i, x[i] >= y[i]. +*/ +/**************************************************************************/ +int rvecge(idx_t n, real_t *x, real_t *y) +{ + for (n--; n>=0; n--) { + if (x[n] < y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function compares vectors x1+x2 against y and returns true + if \forall i, x1[i]+x2[i] <= y[i]. +*/ +/**************************************************************************/ +int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y) +{ + for (n--; n>=0; n--) { + if (x1[n]+x2[n] > y[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns max_i(x[i]-y[i]) */ +/**************************************************************************/ +real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y) +{ + real_t max; + + max = x[0]-y[0]; + + for (n--; n>0; n--) { + if (max < x[n]-y[n]) + max = x[n]-y[n]; + } + + return max; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, x[i] <= z[i]. */ +/**************************************************************************/ +int ivecle(idx_t n, idx_t *x, idx_t *z) +{ + for (n--; n>=0; n--) { + if (x[n] > z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, x[i] >= z[i]. */ +/**************************************************************************/ +int ivecge(idx_t n, idx_t *x, idx_t *z) +{ + for (n--; n>=0; n--) { + if (x[n] < z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, a*x[i]+y[i] <= z[i]. */ +/**************************************************************************/ +int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z) +{ + for (n--; n>=0; n--) { + if (a*x[n]+y[n] > z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function returns true if \forall i, a*x[i]+y[i] >= z[i]. */ +/**************************************************************************/ +int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z) +{ + for (n--; n>=0; n--) { + if (a*x[n]+y[n] < z[n]) + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function checks if v+u2 provides a better balance in the weight + vector that v+u1 */ +/*************************************************************************/ +int BetterVBalance(idx_t ncon, real_t *invtvwgt, idx_t *v_vwgt, idx_t *u1_vwgt, + idx_t *u2_vwgt) +{ + idx_t i; + real_t sum1=0.0, sum2=0.0, diff1=0.0, diff2=0.0; + + for (i=0; i<ncon; i++) { + sum1 += (v_vwgt[i]+u1_vwgt[i])*invtvwgt[i]; + sum2 += (v_vwgt[i]+u2_vwgt[i])*invtvwgt[i]; + } + sum1 = sum1/ncon; + sum2 = sum2/ncon; + + for (i=0; i<ncon; i++) { + diff1 += rabs(sum1 - (v_vwgt[i]+u1_vwgt[i])*invtvwgt[i]); + diff2 += rabs(sum2 - (v_vwgt[i]+u2_vwgt[i])*invtvwgt[i]); + } + + return (diff1 - diff2 >= 0); +} + + +/*************************************************************************/ +/*! This function takes two ubfactor-centered load imbalance vectors x & y, + and returns true if y is better balanced than x. */ +/*************************************************************************/ +int BetterBalance2Way(idx_t n, real_t *x, real_t *y) +{ + real_t nrm1=0.0, nrm2=0.0; + + for (--n; n>=0; n--) { + if (x[n] > 0) nrm1 += x[n]*x[n]; + if (y[n] > 0) nrm2 += y[n]*y[n]; + } + return nrm2 < nrm1; +} + + +/*************************************************************************/ +/*! Given a vertex and two weights, this function returns 1, if the second + partition will be more balanced than the first after the weighted + additional of that vertex. + The balance determination takes into account the ideal target weights + of the two partitions. +*/ +/*************************************************************************/ +int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *ubvec, + idx_t a1, idx_t *pt1, real_t *bm1, + idx_t a2, idx_t *pt2, real_t *bm2) +{ + idx_t i; + real_t tmp, nrm1=0.0, nrm2=0.0, max1=0.0, max2=0.0; + + for (i=0; i<ncon; i++) { + tmp = bm1[i]*(pt1[i]+a1*vwgt[i]) - ubvec[i]; + //printf("BB: %d %+.4f ", (int)i, (float)tmp); + nrm1 += tmp*tmp; + max1 = (tmp > max1 ? tmp : max1); + + tmp = bm2[i]*(pt2[i]+a2*vwgt[i]) - ubvec[i]; + //printf("%+.4f ", (float)tmp); + nrm2 += tmp*tmp; + max2 = (tmp > max2 ? tmp : max2); + + //printf("%4d %4d %4d %4d %4d %4d %4d %.2f\n", + // (int)vwgt[i], + // (int)a1, (int)pt1[i], (int)tpt1[i], + // (int)a2, (int)pt2[i], (int)tpt2[i], ubvec[i]); + } + //printf(" %.3f %.3f %.3f %.3f\n", (float)max1, (float)nrm1, (float)max2, (float)nrm2); + + if (max2 < max1) + return 1; + + if (max2 == max1 && nrm2 < nrm1) + return 1; + + return 0; +} + + +/*************************************************************************/ +/*! Computes the maximum load imbalance of a partitioning solution over + all the constraints. */ +/**************************************************************************/ +real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm) +{ + idx_t i, j, ncon, *pwgts; + real_t max, cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + max = 1.0; + for (i=0; i<ncon; i++) { + for (j=0; j<nparts; j++) { + cur = pwgts[j*ncon+i]*pijbm[j*ncon+i]; + if (cur > max) + max = cur; + } + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the maximum load imbalance difference of a partitioning + solution over all the constraints. + The difference is defined with respect to the allowed maximum + unbalance for the respective constraint. + */ +/**************************************************************************/ +real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubvec) +{ + idx_t i, j, ncon, *pwgts; + real_t max, cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + max = -1.0; + for (i=0; i<ncon; i++) { + for (j=0; j<nparts; j++) { + cur = pwgts[j*ncon+i]*pijbm[j*ncon+i] - ubvec[i]; + if (cur > max) + max = cur; + } + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the difference between load imbalance of each constraint across + the partitions minus the desired upper bound on the load imabalnce. + It also returns the maximum load imbalance across the partitions & + constraints. */ +/**************************************************************************/ +real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubfactors, real_t *diffvec) +{ + idx_t i, j, ncon, *pwgts; + real_t cur, max; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + for (max=-1.0, i=0; i<ncon; i++) { + diffvec[i] = pwgts[i]*pijbm[i] - ubfactors[i]; + for (j=1; j<nparts; j++) { + cur = pwgts[j*ncon+i]*pijbm[j*ncon+i] - ubfactors[i]; + if (cur > diffvec[i]) + diffvec[i] = cur; + } + if (max < diffvec[i]) + max = diffvec[i]; + } + + return max; +} + + +/*************************************************************************/ +/*! Computes the load imbalance of each constraint across the partitions. */ +/**************************************************************************/ +void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *lbvec) +{ + idx_t i, j, ncon, *pwgts; + real_t cur; + + ncon = graph->ncon; + pwgts = graph->pwgts; + + for (i=0; i<ncon; i++) { + lbvec[i] = pwgts[i]*pijbm[i]; + for (j=1; j<nparts; j++) { + cur = pwgts[j*ncon+i]*pijbm[j*ncon+i]; + if (cur > lbvec[i]) + lbvec[i] = cur; + } + } +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/mesh.c b/3rdParty/metis/metis-5.1.1/libmetis/mesh.c new file mode 100644 index 000000000..3c5261211 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/mesh.c @@ -0,0 +1,412 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * mesh.c + * + * This file contains routines for converting 3D and 4D finite element + * meshes into dual or nodal graphs + * + * Started 8/18/97 + * George + * + * $Id: mesh.c 13804 2013-03-04 23:49:08Z karypis $ + * + */ + +#include "metislib.h" + + +/*****************************************************************************/ +/*! This function creates a graph corresponding to the dual of a finite element + mesh. + + \param ne is the number of elements in the mesh. + \param nn is the number of nodes in the mesh. + \param eptr is an array of size ne+1 used to mark the start and end + locations in the nind array. + \param eind is an array that stores for each element the set of node IDs + (indices) that it is made off. The length of this array is equal + to the total number of nodes over all the mesh elements. + \param ncommon is the minimum number of nodes that two elements must share + in order to be connected via an edge in the dual graph. + \param numflag is either 0 or 1 indicating if the numbering of the nodes + starts from 0 or 1, respectively. The same numbering is used for the + returned graph as well. + \param r_xadj indicates where the adjacency list of each vertex is stored + in r_adjncy. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + \param r_adjncy stores the adjacency list of each vertex in the generated + dual graph. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + +*/ +/*****************************************************************************/ +int METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) +{ + int sigrval=0, renumber=0; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* renumber the mesh */ + if (*numflag == 1) { + ChangeMesh2CNumbering(*ne, eptr, eind); + renumber = 1; + } + + /* create dual graph */ + *r_xadj = *r_adjncy = NULL; + CreateGraphDual(*ne, *nn, eptr, eind, *ncommon, r_xadj, r_adjncy); + + +SIGTHROW: + if (renumber) + ChangeMesh2FNumbering(*ne, eptr, eind, *ne, *r_xadj, *r_adjncy); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + if (sigrval != 0) { + if (*r_xadj != NULL) + free(*r_xadj); + if (*r_adjncy != NULL) + free(*r_adjncy); + *r_xadj = *r_adjncy = NULL; + } + + return metis_rcode(sigrval); +} + + +/*****************************************************************************/ +/*! This function creates a graph corresponding to (almost) the nodal of a + finite element mesh. In the nodal graph, each node is connected to the + nodes corresponding to the union of nodes present in all the elements + in which that node belongs. + + \param ne is the number of elements in the mesh. + \param nn is the number of nodes in the mesh. + \param eptr is an array of size ne+1 used to mark the start and end + locations in the nind array. + \param eind is an array that stores for each element the set of node IDs + (indices) that it is made off. The length of this array is equal + to the total number of nodes over all the mesh elements. + \param numflag is either 0 or 1 indicating if the numbering of the nodes + starts from 0 or 1, respectively. The same numbering is used for the + returned graph as well. + \param r_xadj indicates where the adjacency list of each vertex is stored + in r_adjncy. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + \param r_adjncy stores the adjacency list of each vertex in the generated + dual graph. The memory for this array is allocated by this routine. + It can be freed by calling METIS_free(). + +*/ +/*****************************************************************************/ +int METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy) +{ + int sigrval=0, renumber=0; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* renumber the mesh */ + if (*numflag == 1) { + ChangeMesh2CNumbering(*ne, eptr, eind); + renumber = 1; + } + + /* create nodal graph */ + *r_xadj = *r_adjncy = NULL; + CreateGraphNodal(*ne, *nn, eptr, eind, r_xadj, r_adjncy); + + +SIGTHROW: + if (renumber) + ChangeMesh2FNumbering(*ne, eptr, eind, *nn, *r_xadj, *r_adjncy); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + if (sigrval != 0) { + if (*r_xadj != NULL) + free(*r_xadj); + if (*r_adjncy != NULL) + free(*r_adjncy); + *r_xadj = *r_adjncy = NULL; + } + + return metis_rcode(sigrval); +} + + +/*****************************************************************************/ +/*! This function creates the dual of a finite element mesh */ +/*****************************************************************************/ +void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, + idx_t **r_xadj, idx_t **r_adjncy) +{ + idx_t i, j, nnbrs; + idx_t *nptr, *nind; + idx_t *xadj, *adjncy; + idx_t *marker, *nbrs; + + if (ncommon < 1) { + printf(" Increased ncommon to 1, as it was initially %"PRIDX"\n", ncommon); + ncommon = 1; + } + + /* construct the node-element list first */ + nptr = ismalloc(nn+1, 0, "CreateGraphDual: nptr"); + nind = imalloc(eptr[ne], "CreateGraphDual: nind"); + + for (i=0; i<ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nptr[eind[j]]++; + } + MAKECSR(i, nn, nptr); + + for (i=0; i<ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nind[nptr[eind[j]]++] = i; + } + SHIFTCSR(i, nn, nptr); + + + /* Allocate memory for xadj, since you know its size. + These are done using standard malloc as they are returned + to the calling function */ + if ((xadj = (idx_t *)malloc((ne+1)*sizeof(idx_t))) == NULL) + gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n"); + *r_xadj = xadj; + iset(ne+1, 0, xadj); + + /* allocate memory for working arrays used by FindCommonElements */ + marker = ismalloc(ne, 0, "CreateGraphDual: marker"); + nbrs = imalloc(ne, "CreateGraphDual: nbrs"); + + for (i=0; i<ne; i++) { + xadj[i] = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, + nind, eptr, ncommon, marker, nbrs); + } + MAKECSR(i, ne, xadj); + + /* Allocate memory for adjncy, since you now know its size. + These are done using standard malloc as they are returned + to the calling function */ + if ((adjncy = (idx_t *)malloc(xadj[ne]*sizeof(idx_t))) == NULL) { + free(xadj); + *r_xadj = NULL; + gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n"); + } + *r_adjncy = adjncy; + + for (i=0; i<ne; i++) { + nnbrs = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, + nind, eptr, ncommon, marker, nbrs); + for (j=0; j<nnbrs; j++) + adjncy[xadj[i]++] = nbrs[j]; + } + SHIFTCSR(i, ne, xadj); + + gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM); +} + + +/*****************************************************************************/ +/*! This function finds all elements that share at least ncommon nodes with + the ``query'' element. +*/ +/*****************************************************************************/ +idx_t FindCommonElements(idx_t qid, idx_t elen, idx_t *eind, idx_t *nptr, + idx_t *nind, idx_t *eptr, idx_t ncommon, idx_t *marker, idx_t *nbrs) +{ + idx_t i, ii, j, jj, k, l, overlap; + + /* find all elements that share at least one node with qid */ + for (k=0, i=0; i<elen; i++) { + j = eind[i]; + for (ii=nptr[j]; ii<nptr[j+1]; ii++) { + jj = nind[ii]; + + if (marker[jj] == 0) + nbrs[k++] = jj; + marker[jj]++; + } + } + + /* put qid into the neighbor list (in case it is not there) so that it + will be removed in the next step */ + if (marker[qid] == 0) + nbrs[k++] = qid; + marker[qid] = 0; + + /* compact the list to contain only those with at least ncommon nodes */ + for (j=0, i=0; i<k; i++) { + overlap = marker[l = nbrs[i]]; + if (overlap >= ncommon || + overlap >= elen-1 || + overlap >= eptr[l+1]-eptr[l]-1) + nbrs[j++] = l; + marker[l] = 0; + } + + return j; +} + + +/*****************************************************************************/ +/*! This function creates the (almost) nodal of a finite element mesh */ +/*****************************************************************************/ +void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, + idx_t **r_xadj, idx_t **r_adjncy) +{ + idx_t i, j, nnbrs; + idx_t *nptr, *nind; + idx_t *xadj, *adjncy; + idx_t *marker, *nbrs; + + + /* construct the node-element list first */ + nptr = ismalloc(nn+1, 0, "CreateGraphNodal: nptr"); + nind = imalloc(eptr[ne], "CreateGraphNodal: nind"); + + for (i=0; i<ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nptr[eind[j]]++; + } + MAKECSR(i, nn, nptr); + + for (i=0; i<ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nind[nptr[eind[j]]++] = i; + } + SHIFTCSR(i, nn, nptr); + + + /* Allocate memory for xadj, since you know its size. + These are done using standard malloc as they are returned + to the calling function */ + if ((xadj = (idx_t *)malloc((nn+1)*sizeof(idx_t))) == NULL) + gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n"); + *r_xadj = xadj; + iset(nn+1, 0, xadj); + + /* allocate memory for working arrays used by FindCommonElements */ + marker = ismalloc(nn, 0, "CreateGraphNodal: marker"); + nbrs = imalloc(nn, "CreateGraphNodal: nbrs"); + + for (i=0; i<nn; i++) { + xadj[i] = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, + eind, marker, nbrs); + } + MAKECSR(i, nn, xadj); + + /* Allocate memory for adjncy, since you now know its size. + These are done using standard malloc as they are returned + to the calling function */ + if ((adjncy = (idx_t *)malloc(xadj[nn]*sizeof(idx_t))) == NULL) { + free(xadj); + *r_xadj = NULL; + gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n"); + } + *r_adjncy = adjncy; + + for (i=0; i<nn; i++) { + nnbrs = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, + eind, marker, nbrs); + for (j=0; j<nnbrs; j++) + adjncy[xadj[i]++] = nbrs[j]; + } + SHIFTCSR(i, nn, xadj); + + gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM); +} + + +/*****************************************************************************/ +/*! This function finds the union of nodes that are in the same elements with + the ``query'' node. +*/ +/*****************************************************************************/ +idx_t FindCommonNodes(idx_t qid, idx_t nelmnts, idx_t *elmntids, idx_t *eptr, + idx_t *eind, idx_t *marker, idx_t *nbrs) +{ + idx_t i, ii, j, jj, k; + + /* find all nodes that share at least one element with qid */ + marker[qid] = 1; /* this is to prevent self-loops */ + for (k=0, i=0; i<nelmnts; i++) { + j = elmntids[i]; + for (ii=eptr[j]; ii<eptr[j+1]; ii++) { + jj = eind[ii]; + if (marker[jj] == 0) { + nbrs[k++] = jj; + marker[jj] = 1; + } + } + } + + /* reset the marker */ + marker[qid] = 0; + for (i=0; i<k; i++) { + marker[nbrs[i]] = 0; + } + + return k; +} + + + +/*************************************************************************/ +/*! This function creates and initializes a mesh_t structure */ +/*************************************************************************/ +mesh_t *CreateMesh(void) +{ + mesh_t *mesh; + + mesh = (mesh_t *)gk_malloc(sizeof(mesh_t), "CreateMesh: mesh"); + + InitMesh(mesh); + + return mesh; +} + + +/*************************************************************************/ +/*! This function initializes a mesh_t data structure */ +/*************************************************************************/ +void InitMesh(mesh_t *mesh) +{ + memset((void *)mesh, 0, sizeof(mesh_t)); +} + + +/*************************************************************************/ +/*! This function deallocates any memory stored in a mesh */ +/*************************************************************************/ +void FreeMesh(mesh_t **r_mesh) +{ + mesh_t *mesh = *r_mesh; + + gk_free((void **)&mesh->eptr, &mesh->eind, &mesh->ewgt, &mesh, LTERM); + + *r_mesh = NULL; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/meshpart.c b/3rdParty/metis/metis-5.1.1/libmetis/meshpart.c new file mode 100644 index 000000000..77fd35c55 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/meshpart.c @@ -0,0 +1,262 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * meshpart.c + * + * This file contains routines for partitioning finite element meshes. + * + * Started 9/29/97 + * George + * + * $Id: meshpart.c 17513 2014-08-05 16:20:50Z dominique $ + * + */ + +#include "metislib.h" + + +/************************************************************************* +* This function partitions a finite element mesh by partitioning its nodal +* graph using KMETIS and then assigning elements in a load balanced fashion. +**************************************************************************/ +int METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, + idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart) +{ + int sigrval=0, renumber=0, ptype; + idx_t *xadj=NULL, *adjncy=NULL; + idx_t ncon=1, pnumflag=0; + int rstatus=METIS_OK; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0); + ptype = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY); + + /* renumber the mesh */ + if (renumber) { + ChangeMesh2CNumbering(*ne, eptr, eind); + options[METIS_OPTION_NUMBERING] = 0; + } + + /* get the nodal graph */ + rstatus = METIS_MeshToNodal(ne, nn, eptr, eind, &pnumflag, &xadj, &adjncy); + if (rstatus != METIS_OK) + raise(SIGERR); + + /* partition the graph */ + if (ptype == METIS_PTYPE_KWAY) + rstatus = METIS_PartGraphKway(nn, &ncon, xadj, adjncy, vwgt, vsize, NULL, + nparts, tpwgts, NULL, options, objval, npart); + else + rstatus = METIS_PartGraphRecursive(nn, &ncon, xadj, adjncy, vwgt, vsize, NULL, + nparts, tpwgts, NULL, options, objval, npart); + + if (rstatus != METIS_OK) + raise(SIGERR); + + /* partition the other side of the mesh */ + InduceRowPartFromColumnPart(*ne, eptr, eind, epart, npart, *nparts, tpwgts); + + +SIGTHROW: + if (renumber) { + ChangeMesh2FNumbering2(*ne, *nn, eptr, eind, epart, npart); + options[METIS_OPTION_NUMBERING] = 1; + } + + METIS_Free(xadj); + METIS_Free(adjncy); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} + + + +/************************************************************************* +* This function partitions a finite element mesh by partitioning its dual +* graph using KMETIS and then assigning nodes in a load balanced fashion. +**************************************************************************/ +int METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, + idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, + real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, + idx_t *npart) +{ + int sigrval=0, renumber=0, ptype; + idx_t i, j; + idx_t *xadj=NULL, *adjncy=NULL, *nptr=NULL, *nind=NULL; + idx_t ncon=1, pnumflag=0; + int rstatus = METIS_OK; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0); + ptype = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY); + + /* renumber the mesh */ + if (renumber) { + ChangeMesh2CNumbering(*ne, eptr, eind); + options[METIS_OPTION_NUMBERING] = 0; + } + + /* get the dual graph */ + rstatus = METIS_MeshToDual(ne, nn, eptr, eind, ncommon, &pnumflag, &xadj, &adjncy); + if (rstatus != METIS_OK) + raise(SIGERR); + + /* partition the graph */ + if (ptype == METIS_PTYPE_KWAY) + rstatus = METIS_PartGraphKway(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, + nparts, tpwgts, NULL, options, objval, epart); + else + rstatus = METIS_PartGraphRecursive(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, + nparts, tpwgts, NULL, options, objval, epart); + + if (rstatus != METIS_OK) + raise(SIGERR); + + + /* construct the node-element list */ + nptr = ismalloc(*nn+1, 0, "METIS_PartMeshDual: nptr"); + nind = imalloc(eptr[*ne], "METIS_PartMeshDual: nind"); + + for (i=0; i<*ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nptr[eind[j]]++; + } + MAKECSR(i, *nn, nptr); + + for (i=0; i<*ne; i++) { + for (j=eptr[i]; j<eptr[i+1]; j++) + nind[nptr[eind[j]]++] = i; + } + SHIFTCSR(i, *nn, nptr); + + /* partition the other side of the mesh */ + InduceRowPartFromColumnPart(*nn, nptr, nind, npart, epart, *nparts, tpwgts); + + gk_free((void **)&nptr, &nind, LTERM); + + +SIGTHROW: + if (renumber) { + ChangeMesh2FNumbering2(*ne, *nn, eptr, eind, epart, npart); + options[METIS_OPTION_NUMBERING] = 1; + } + + METIS_Free(xadj); + METIS_Free(adjncy); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} + + + +/*************************************************************************/ +/*! Induces a partitioning of the rows based on a a partitioning of the + columns. It is used by both the Nodal and Dual routines. */ +/*************************************************************************/ +void InduceRowPartFromColumnPart(idx_t nrows, idx_t *rowptr, idx_t *rowind, + idx_t *rpart, idx_t *cpart, idx_t nparts, real_t *tpwgts) +{ + idx_t i, j, k, me; + idx_t nnbrs, *pwgts, *nbrdom, *nbrwgt, *nbrmrk; + idx_t *itpwgts; + + pwgts = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: pwgts"); + nbrdom = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: nbrdom"); + nbrwgt = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: nbrwgt"); + nbrmrk = ismalloc(nparts, -1, "InduceRowPartFromColumnPart: nbrmrk"); + + iset(nrows, -1, rpart); + + /* setup the integer target partition weights */ + itpwgts = imalloc(nparts, "InduceRowPartFromColumnPart: itpwgts"); + if (tpwgts == NULL) { + iset(nparts, 1+nrows/nparts, itpwgts); + } + else { + for (i=0; i<nparts; i++) + itpwgts[i] = 1+nrows*tpwgts[i]; + } + + /* first assign the rows consisting only of columns that belong to + a single partition. Assign rows that are empty to -2 (un-assigned) */ + for (i=0; i<nrows; i++) { + if (rowptr[i+1]-rowptr[i] == 0) { + rpart[i] = -2; + continue; + } + + me = cpart[rowind[rowptr[i]]]; + for (j=rowptr[i]+1; j<rowptr[i+1]; j++) { + if (cpart[rowind[j]] != me) + break; + } + if (j == rowptr[i+1]) { + rpart[i] = me; + pwgts[me]++; + } + } + + /* next assign the rows consisting of columns belonging to multiple + partitions in a balanced way */ + for (i=0; i<nrows; i++) { + if (rpart[i] == -1) { + for (nnbrs=0, j=rowptr[i]; j<rowptr[i+1]; j++) { + me = cpart[rowind[j]]; + if (nbrmrk[me] == -1) { + nbrdom[nnbrs] = me; + nbrwgt[nnbrs] = 1; + nbrmrk[me] = nnbrs++; + } + else { + nbrwgt[nbrmrk[me]]++; + } + } + ASSERT(nnbrs > 0); + + /* assign it first to the domain with most things in common */ + rpart[i] = nbrdom[iargmax(nnbrs, nbrwgt,1)]; + + /* if overweight, assign it to the light domain */ + if (pwgts[rpart[i]] > itpwgts[rpart[i]]) { + for (j=0; j<nnbrs; j++) { + if (pwgts[nbrdom[j]] < itpwgts[nbrdom[j]] || + pwgts[nbrdom[j]]-itpwgts[nbrdom[j]] < pwgts[rpart[i]]-itpwgts[rpart[i]]) { + rpart[i] = nbrdom[j]; + break; + } + } + } + pwgts[rpart[i]]++; + + /* reset nbrmrk array */ + for (j=0; j<nnbrs; j++) + nbrmrk[nbrdom[j]] = -1; + } + } + + gk_free((void **)&pwgts, &nbrdom, &nbrwgt, &nbrmrk, &itpwgts, LTERM); + +} diff --git a/3rdParty/metis/metis-5.1.1/libmetis/metislib.h b/3rdParty/metis/metis-5.1.1/libmetis/metislib.h new file mode 100644 index 000000000..dc224f429 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/metislib.h @@ -0,0 +1,41 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * metis.h + * + * This file includes all necessary header files + * + * Started 8/27/94 + * George + * + * $Id: metislib.h 10655 2011-08-02 17:38:11Z benjamin $ + */ + +#ifndef _LIBMETIS_METISLIB_H_ +#define _LIBMETIS_METISLIB_H_ + +#include <GKlib.h> + +#if defined(ENABLE_OPENMP) + #include <omp.h> +#endif + + +#include <metis.h> +#include "rename.h" +#include "gklib_defs.h" + +#include "defs.h" +#include "struct.h" +#include "macros.h" +#include "proto.h" + + +#if defined(COMPILER_MSC) +#if defined(rint) + #undef rint +#endif +#define rint(x) ((idx_t)((x)+0.5)) /* MSC does not have rint() function */ +#endif + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/minconn.c b/3rdParty/metis/metis-5.1.1/libmetis/minconn.c new file mode 100644 index 000000000..9f92f2f71 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/minconn.c @@ -0,0 +1,729 @@ +/*! +\file +\brief Functions that deal with prunning the number of adjacent subdomains in kmetis + +\date Started 7/15/98 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: minconn.c 17513 2014-08-05 16:20:50Z dominique $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function computes the subdomain graph storing the result in the + pre-allocated worspace arrays */ +/*************************************************************************/ +void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, pid, other, nparts, nvtxs, nnbrs; + idx_t *xadj, *adjncy, *adjwgt, *where; + idx_t *pptr, *pind; + idx_t nads=0, *vadids, *vadwgts; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + where = graph->where; + + nparts = ctrl->nparts; + + vadids = ctrl->pvec1; + vadwgts = iset(nparts, 0, ctrl->pvec2); + + pptr = iwspacemalloc(ctrl, nparts+1); + pind = iwspacemalloc(ctrl, nvtxs); + iarray2csr(nvtxs, nparts, where, pptr, pind); + + for (pid=0; pid<nparts; pid++) { + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + { + ckrinfo_t *rinfo; + cnbr_t *nbrs; + + rinfo = graph->ckrinfo; + for (nads=0, ii=pptr[pid]; ii<pptr[pid+1]; ii++) { + i = pind[ii]; + ASSERT(pid == where[i]); + + if (rinfo[i].ed > 0) { + nnbrs = rinfo[i].nnbrs; + nbrs = ctrl->cnbrpool + rinfo[i].inbr; + + for (j=0; j<nnbrs; j++) { + other = nbrs[j].pid; + if (vadwgts[other] == 0) + vadids[nads++] = other; + vadwgts[other] += nbrs[j].ed; + } + } + } + } + break; + + case METIS_OBJTYPE_VOL: + { + vkrinfo_t *rinfo; + vnbr_t *nbrs; + + rinfo = graph->vkrinfo; + for (nads=0, ii=pptr[pid]; ii<pptr[pid+1]; ii++) { + i = pind[ii]; + ASSERT(pid == where[i]); + + if (rinfo[i].ned > 0) { + nnbrs = rinfo[i].nnbrs; + nbrs = ctrl->vnbrpool + rinfo[i].inbr; + + for (j=0; j<nnbrs; j++) { + other = nbrs[j].pid; + if (vadwgts[other] == 0) + vadids[nads++] = other; + vadwgts[other] += nbrs[j].ned; + } + } + } + } + break; + + default: + gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype); + } + + /* See if you have enough memory to store the adjacent info for that subdomain */ + if (ctrl->maxnads[pid] < nads) { + ctrl->maxnads[pid] = 2*nads; + ctrl->adids[pid] = irealloc(ctrl->adids[pid], ctrl->maxnads[pid], + "ComputeSubDomainGraph: adids[pid]"); + ctrl->adwgts[pid] = irealloc(ctrl->adwgts[pid], ctrl->maxnads[pid], + "ComputeSubDomainGraph: adids[pid]"); + } + + ctrl->nads[pid] = nads; + for (j=0; j<nads; j++) { + ctrl->adids[pid][j] = vadids[j]; + ctrl->adwgts[pid][j] = vadwgts[vadids[j]]; + + vadwgts[vadids[j]] = 0; + } + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function updates the weight of an edge in the subdomain graph by + adding to it the value of ewgt. The update can either increase or + decrease the weight of the subdomain edge based on the value of ewgt. + + \param u is the ID of one of the incident subdomains to the edge + \param v is the ID of the other incident subdomains to the edge + \param ewgt is the weight to be added to the subdomain edge + \param nparts is the number of subdomains + \param r_maxndoms is the maximum number of adjacent subdomains and is + updated as necessary. The update is skipped if a NULL value is + supplied. +*/ +/*************************************************************************/ +void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, + idx_t *r_maxndoms) +{ + idx_t i, j, nads; + + if (ewgt == 0) + return; + + for (i=0; i<2; i++) { + nads = ctrl->nads[u]; + /* Find the edge */ + for (j=0; j<nads; j++) { + if (ctrl->adids[u][j] == v) { + ctrl->adwgts[u][j] += ewgt; + break; + } + } + + if (j == nads) { + /* Deal with the case in which the edge was not found */ + ASSERT(ewgt > 0); + if (ctrl->maxnads[u] == nads) { + ctrl->maxnads[u] = 2*(nads+1); + ctrl->adids[u] = irealloc(ctrl->adids[u], ctrl->maxnads[u], + "IncreaseEdgeSubDomainGraph: adids[pid]"); + ctrl->adwgts[u] = irealloc(ctrl->adwgts[u], ctrl->maxnads[u], + "IncreaseEdgeSubDomainGraph: adids[pid]"); + } + ctrl->adids[u][nads] = v; + ctrl->adwgts[u][nads] = ewgt; + nads++; + if (r_maxndoms != NULL && nads > *r_maxndoms) { + printf("You just increased the maxndoms: %"PRIDX" %"PRIDX"\n", + nads, *r_maxndoms); + *r_maxndoms = nads; + } + } + else { + /* See if the updated edge becomes 0 */ + ASSERT(ctrl->adwgts[u][j] >= 0); + if (ctrl->adwgts[u][j] == 0) { + ctrl->adids[u][j] = ctrl->adids[u][nads-1]; + ctrl->adwgts[u][j] = ctrl->adwgts[u][nads-1]; + nads--; + if (r_maxndoms != NULL && nads+1 == *r_maxndoms) + *r_maxndoms = ctrl->nads[iargmax(ctrl->nparts, ctrl->nads,1)]; + } + } + ctrl->nads[u] = nads; + + SWAP(u, v, j); + } +} + + +/*************************************************************************/ +/*! This function computes the subdomain graph */ +/*************************************************************************/ +void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, ncon, nparts, scheme, pid_from, pid_to, me, other, nvtxs, + total, max, avg, totalout, nind=0, ncand=0, ncand2, target, target2, + nadd, bestnadd=0; + idx_t min, move, *cpwgt; + idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, + *mypmat, *otherpmat, *kpmat, *ind; + idx_t *nads, **adids, **adwgts; + ikv_t *cand, *cand2; + ipq_t queue; + real_t *tpwgts, badfactor=1.4; + idx_t *pptr, *pind; + idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL; /* volume specific work arrays */ + + WCOREPUSH; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt); + + where = graph->where; + pwgts = graph->pwgts; /* We assume that this is properly initialized */ + + nparts = ctrl->nparts; + tpwgts = ctrl->tpwgts; + + cpwgt = iwspacemalloc(ctrl, ncon); + maxpwgt = iwspacemalloc(ctrl, nparts*ncon); + ind = iwspacemalloc(ctrl, nvtxs); + otherpmat = iset(nparts, 0, iwspacemalloc(ctrl, nparts)); + + cand = ikvwspacemalloc(ctrl, nparts); + cand2 = ikvwspacemalloc(ctrl, nparts); + + pptr = iwspacemalloc(ctrl, nparts+1); + pind = iwspacemalloc(ctrl, nvtxs); + iarray2csr(nvtxs, nparts, where, pptr, pind); + + if (ctrl->objtype == METIS_OBJTYPE_VOL) { + /* Vol-refinement specific working arrays */ + modind = iwspacemalloc(ctrl, nvtxs); + vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs)); + pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts)); + } + + + /* Compute the pmat matrix and ndoms */ + ComputeSubDomainGraph(ctrl, graph); + + nads = ctrl->nads; + adids = ctrl->adids; + adwgts = ctrl->adwgts; + + mypmat = iset(nparts, 0, ctrl->pvec1); + kpmat = iset(nparts, 0, ctrl->pvec2); + + /* Compute the maximum allowed weight for each domain */ + for (i=0; i<nparts; i++) { + for (j=0; j<ncon; j++) + maxpwgt[i*ncon+j] = + (ncon == 1 ? 1.25 : 1.025)*tpwgts[i]*graph->tvwgt[j]*ctrl->ubfactors[j]; + } + + ipqInit(&queue, nparts); + + /* Get into the loop eliminating subdomain connections */ + while (1) { + total = isum(nparts, nads, 1); + avg = total/nparts; + max = nads[iargmax(nparts, nads,1)]; + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("Adjacent Subdomain Stats: Total: %3"PRIDX", " + "Max: %3"PRIDX"[%zu], Avg: %3"PRIDX"\n", + total, max, iargmax(nparts, nads,1), avg)); + + if (max < badfactor*avg) + break; + + /* Add the subdomains that you will try to reduce their connectivity */ + ipqReset(&queue); + for (i=0; i<nparts; i++) { + if (nads[i] >= avg + (max-avg)/2) + ipqInsert(&queue, i, nads[i]); + } + + move = 0; + while ((me = ipqGetTop(&queue)) != -1) { + totalout = isum(nads[me], adwgts[me], 1); + + for (ncand2=0, i=0; i<nads[me]; i++) { + mypmat[adids[me][i]] = adwgts[me][i]; + + /* keep track of the weakly connected adjacent subdomains */ + if (2*nads[me]*adwgts[me][i] < totalout) { + cand2[ncand2].val = adids[me][i]; + cand2[ncand2++].key = adwgts[me][i]; + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("Me: %"PRIDX", Degree: %4"PRIDX", TotalOut: %"PRIDX",\n", + me, nads[me], totalout)); + + /* Sort the connections according to their cut */ + ikvsorti(ncand2, cand2); + + /* Two schemes are used for eliminating subdomain edges. + The first, tries to eliminate subdomain edges by moving remote groups + of vertices to subdomains that 'me' is already connected to. + The second, tries to eliminate subdomain edges by moving entire sets of + my vertices that connect to the 'other' subdomain to a subdomain that + I'm already connected to. + These two schemes are applied in sequence. */ + target = target2 = -1; + for (scheme=0; scheme<2; scheme++) { + for (min=0; min<ncand2; min++) { + other = cand2[min].val; + + /* pid_from is the subdomain from where the vertices will be removed. + pid_to is the adjacent subdomain to pid_from that defines the + (me, other) subdomain edge that needs to be removed */ + if (scheme == 0) { + pid_from = other; + pid_to = me; + } + else { + pid_from = me; + pid_to = other; + } + + /* Go and find the vertices in 'other' that are connected in 'me' */ + for (nind=0, ii=pptr[pid_from]; ii<pptr[pid_from+1]; ii++) { + i = pind[ii]; + ASSERT(where[i] == pid_from); + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (where[adjncy[j]] == pid_to) { + ind[nind++] = i; + break; + } + } + } + + /* Go and construct the otherpmat to see where these nind vertices are + connected to */ + iset(ncon, 0, cpwgt); + for (ncand=0, ii=0; ii<nind; ii++) { + i = ind[ii]; + iaxpy(ncon, 1, vwgt+i*ncon, 1, cpwgt, 1); + + for (j=xadj[i]; j<xadj[i+1]; j++) { + if ((k = where[adjncy[j]]) == pid_from) + continue; + if (otherpmat[k] == 0) + cand[ncand++].val = k; + otherpmat[k] += (adjwgt ? adjwgt[j] : 1); + } + } + + for (i=0; i<ncand; i++) { + cand[i].key = otherpmat[cand[i].val]; + ASSERT(cand[i].key > 0); + } + + ikvsortd(ncand, cand); + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("\tMinOut: %4"PRIDX", to: %3"PRIDX", TtlWgt: %5"PRIDX"[#:%"PRIDX"]\n", + mypmat[other], other, isum(ncon, cpwgt, 1), nind)); + + /* Go through and select the first domain that is common with 'me', and does + not increase the nads[target] higher than nads[me], subject to the maxpwgt + constraint. Traversal is done from the mostly connected to the least. */ + for (i=0; i<ncand; i++) { + k = cand[i].val; + + if (mypmat[k] > 0) { + /* Check if balance will go off */ + if (!ivecaxpylez(ncon, 1, cpwgt, pwgts+k*ncon, maxpwgt+k*ncon)) + continue; + + /* get a dense vector out of k's connectivity */ + for (j=0; j<nads[k]; j++) + kpmat[adids[k][j]] = adwgts[k][j]; + + /* Check if the move to domain k will increase the nads of another + subdomain j that the set of vertices being moved are connected + to but domain k is not connected to. */ + for (j=0; j<nparts; j++) { + if (otherpmat[j] > 0 && kpmat[j] == 0 && nads[j]+1 >= nads[me]) + break; + } + + /* There were no bad second level effects. See if you can find a + subdomain to move to. */ + if (j == nparts) { + for (nadd=0, j=0; j<nparts; j++) { + if (otherpmat[j] > 0 && kpmat[j] == 0) + nadd++; + } + + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("\t\tto=%"PRIDX", nadd=%"PRIDX", %"PRIDX"\n", k, nadd, nads[k])); + + if (nads[k]+nadd < nads[me]) { + if (target2 == -1 || nads[target2]+bestnadd > nads[k]+nadd || + (nads[target2]+bestnadd == nads[k]+nadd && bestnadd > nadd)) { + target2 = k; + bestnadd = nadd; + } + } + + if (nadd == 0) + target = k; + } + + /* reset kpmat for the next iteration */ + for (j=0; j<nads[k]; j++) + kpmat[adids[k][j]] = 0; + } + + if (target != -1) + break; + } + + /* reset the otherpmat for the next iteration */ + for (i=0; i<ncand; i++) + otherpmat[cand[i].val] = 0; + + if (target == -1 && target2 != -1) + target = target2; + + if (target != -1) { + IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, + printf("\t\tScheme: %"PRIDX". Moving to %"PRIDX"\n", scheme, target)); + move = 1; + break; + } + } + + if (target != -1) + break; /* A move was found. No need to try the other scheme */ + } + + /* reset the mypmat for next iteration */ + for (i=0; i<nads[me]; i++) + mypmat[adids[me][i]] = 0; + + /* Note that once a target is found the above loops exit right away. So the + following variables are valid */ + if (target != -1) { + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + MoveGroupMinConnForCut(ctrl, graph, target, nind, ind); + break; + case METIS_OBJTYPE_VOL: + MoveGroupMinConnForVol(ctrl, graph, target, nind, ind, vmarker, + pmarker, modind); + break; + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + /* Update the csr representation of the partitioning vector */ + iarray2csr(nvtxs, nparts, where, pptr, pind); + } + } + + if (move == 0) + break; + } + + ipqFree(&queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo */ +/*************************************************************************/ +void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind) +{ + idx_t i, ii, j, jj, k, l, nvtxs, nbnd, from, me; + idx_t *xadj, *adjncy, *adjwgt, *where, *bndptr, *bndind; + ckrinfo_t *myrinfo; + cnbr_t *mynbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + + nbnd = graph->nbnd; + + while (--nind>=0) { + i = ind[nind]; + from = where[i]; + + myrinfo = graph->ckrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->cnbrpool + myrinfo->inbr; + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + if (k == myrinfo->nnbrs) { + ASSERT(k < xadj[i+1]-xadj[i]); + mynbrs[k].pid = to; + mynbrs[k].ed = 0; + myrinfo->nnbrs++; + } + + /* Update pwgts */ + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update mincut */ + graph->mincut -= mynbrs[k].ed-myrinfo->id; + + /* Update subdomain connectivity graph to reflect the move of 'i' */ + UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, NULL); + + /* Update ID/ED and BND related information for the moved vertex */ + UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, + bndptr, bndind, BNDTYPE_REFINE); + + /* Update the degrees of adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + me = where[ii]; + myrinfo = graph->ckrinfo+ii; + + UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, + from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE); + + /* Update subdomain graph to reflect the move of 'i' for domains other + than 'from' and 'to' */ + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], NULL); + UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], NULL); + } + } + } + + ASSERT(ComputeCut(graph, where) == graph->mincut); + + graph->nbnd = nbnd; + +} + + +/*************************************************************************/ +/*! This function moves a collection of vertices and updates their rinfo */ +/*************************************************************************/ +void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind) +{ + idx_t i, ii, j, jj, k, l, nvtxs, from, me, other, xgain, ewgt; + idx_t *xadj, *vsize, *adjncy, *where; + vkrinfo_t *myrinfo, *orinfo; + vnbr_t *mynbrs, *onbrs; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vsize = graph->vsize; + adjncy = graph->adjncy; + where = graph->where; + + while (--nind>=0) { + i = ind[nind]; + from = where[i]; + + myrinfo = graph->vkrinfo+i; + if (myrinfo->inbr == -1) { + myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1); + myrinfo->nnbrs = 0; + } + mynbrs = ctrl->vnbrpool + myrinfo->inbr; + + xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0); + + //printf("Moving %"PRIDX" from %"PRIDX" to %"PRIDX" [vsize: %"PRIDX"] [xgain: %"PRIDX"]\n", + // i, from, to, vsize[i], xgain); + + /* find the location of 'to' in myrinfo or create it if it is not there */ + for (k=0; k<myrinfo->nnbrs; k++) { + if (mynbrs[k].pid == to) + break; + } + + if (k == myrinfo->nnbrs) { + //printf("Missing neighbor\n"); + + if (myrinfo->nid > 0) + xgain -= vsize[i]; + + /* determine the volume gain resulting from that move */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + ii = adjncy[j]; + other = where[ii]; + orinfo = graph->vkrinfo+ii; + onbrs = ctrl->vnbrpool + orinfo->inbr; + ASSERT(other != to) + + //printf(" %8d %8d %3d\n", (int)ii, (int)vsize[ii], (int)other); + + if (from == other) { + /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + } + else { + /* Remote vertex: increase if 'to' is a new subdomain */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == to) + break; + } + if (l == orinfo->nnbrs) + xgain -= vsize[ii]; + + /* Remote vertex: decrease if i is the only connection to 'from' */ + for (l=0; l<orinfo->nnbrs; l++) { + if (onbrs[l].pid == from && onbrs[l].ned == 1) { + xgain += vsize[ii]; + break; + } + } + } + } + graph->minvol -= xgain; + graph->mincut -= -myrinfo->nid; + ewgt = myrinfo->nid; + } + else { + graph->minvol -= (xgain + mynbrs[k].gv); + graph->mincut -= mynbrs[k].ned-myrinfo->nid; + ewgt = myrinfo->nid-mynbrs[k].ned; + } + + /* Update where and pwgts */ + where[i] = to; + iaxpy(graph->ncon, 1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon, 1); + iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1); + + /* Update subdomain connectivity graph to reflect the move of 'i' */ + UpdateEdgeSubDomainGraph(ctrl, from, to, ewgt, NULL); + + /* Update the subdomain connectivity of the adjacent vertices */ + for (j=xadj[i]; j<xadj[i+1]; j++) { + me = where[adjncy[j]]; + if (me != from && me != to) { + UpdateEdgeSubDomainGraph(ctrl, from, me, -1, NULL); + UpdateEdgeSubDomainGraph(ctrl, to, me, 1, NULL); + } + } + + /* Update the id/ed/gains/bnd of potentially affected nodes */ + KWayVolUpdate(ctrl, graph, i, from, to, NULL, NULL, NULL, NULL, + NULL, BNDTYPE_REFINE, vmarker, pmarker, modind); + + /*CheckKWayVolPartitionParams(ctrl, graph);*/ + } + ASSERT(ComputeCut(graph, where) == graph->mincut); + ASSERTP(ComputeVolume(graph, where) == graph->minvol, + ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol)); + +} + + +/*************************************************************************/ +/*! This function computes the subdomain graph. For deubuging purposes. */ +/*************************************************************************/ +void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i, j, k, me, nvtxs, total, max; + idx_t *xadj, *adjncy, *adjwgt, *pmat; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + pmat = ismalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat"); + + for (i=0; i<nvtxs; i++) { + me = where[i]; + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + if (where[k] != me) + pmat[me*nparts+where[k]] += adjwgt[j]; + } + } + + /* printf("Subdomain Info\n"); */ + total = max = 0; + for (i=0; i<nparts; i++) { + for (k=0, j=0; j<nparts; j++) { + if (pmat[i*nparts+j] > 0) + k++; + } + total += k; + + if (k > max) + max = k; +/* + printf("%2"PRIDX" -> %2"PRIDX" ", i, k); + for (j=0; j<nparts; j++) { + if (pmat[i*nparts+j] > 0) + printf("[%2"PRIDX" %4"PRIDX"] ", j, pmat[i*nparts+j]); + } + printf("\n"); +*/ + } + printf("Total adjacent subdomains: %"PRIDX", Max: %"PRIDX"\n", total, max); + + gk_free((void **)&pmat, LTERM); +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/mincover.c b/3rdParty/metis/metis-5.1.1/libmetis/mincover.c new file mode 100644 index 000000000..ed437fff1 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/mincover.c @@ -0,0 +1,259 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * mincover.c + * + * This file implements the minimum cover algorithm + * + * Started 8/1/97 + * George + * + * $Id: mincover.c 9942 2011-05-17 22:09:52Z karypis $ + */ + +#include "metislib.h" + +/************************************************************************* +* Constants used by mincover algorithm +**************************************************************************/ +#define INCOL 10 +#define INROW 20 +#define VC 1 +#define SC 2 +#define HC 3 +#define VR 4 +#define SR 5 +#define HR 6 + + +/************************************************************************* +* This function returns the min-cover of a bipartite graph. +* The algorithm used is due to Hopcroft and Karp as modified by Duff etal +* adj: the adjacency list of the bipartite graph +* asize: the number of vertices in the first part of the bipartite graph +* bsize-asize: the number of vertices in the second part +* 0..(asize-1) > A vertices +* asize..bsize > B vertices +* +* Returns: +* cover : the actual cover (array) +* csize : the size of the cover +**************************************************************************/ +void MinCover(idx_t *xadj, idx_t *adjncy, idx_t asize, idx_t bsize, idx_t *cover, idx_t *csize) +{ + idx_t i, j; + idx_t *mate, *queue, *flag, *level, *lst; + idx_t fptr, rptr, lstptr; + idx_t row, maxlevel, col; + + mate = ismalloc(bsize, -1, "MinCover: mate"); + flag = imalloc(bsize, "MinCover: flag"); + level = imalloc(bsize, "MinCover: level"); + queue = imalloc(bsize, "MinCover: queue"); + lst = imalloc(bsize, "MinCover: lst"); + + /* Get a cheap matching */ + for (i=0; i<asize; i++) { + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (mate[adjncy[j]] == -1) { + mate[i] = adjncy[j]; + mate[adjncy[j]] = i; + break; + } + } + } + + /* Get into the main loop */ + while (1) { + /* Initialization */ + fptr = rptr = 0; /* Empty Queue */ + lstptr = 0; /* Empty List */ + for (i=0; i<bsize; i++) { + level[i] = -1; + flag[i] = 0; + } + maxlevel = bsize; + + /* Insert free nodes into the queue */ + for (i=0; i<asize; i++) + if (mate[i] == -1) { + queue[rptr++] = i; + level[i] = 0; + } + + /* Perform the BFS */ + while (fptr != rptr) { + row = queue[fptr++]; + if (level[row] < maxlevel) { + flag[row] = 1; + for (j=xadj[row]; j<xadj[row+1]; j++) { + col = adjncy[j]; + if (!flag[col]) { /* If this column has not been accessed yet */ + flag[col] = 1; + if (mate[col] == -1) { /* Free column node was found */ + maxlevel = level[row]; + lst[lstptr++] = col; + } + else { /* This column node is matched */ + if (flag[mate[col]]) + printf("\nSomething wrong, flag[%"PRIDX"] is 1",mate[col]); + queue[rptr++] = mate[col]; + level[mate[col]] = level[row] + 1; + } + } + } + } + } + + if (lstptr == 0) + break; /* No free columns can be reached */ + + /* Perform restricted DFS from the free column nodes */ + for (i=0; i<lstptr; i++) + MinCover_Augment(xadj, adjncy, lst[i], mate, flag, level, maxlevel); + } + + MinCover_Decompose(xadj, adjncy, asize, bsize, mate, cover, csize); + + gk_free((void **)&mate, &flag, &level, &queue, &lst, LTERM); + +} + + +/************************************************************************* +* This function perfoms a restricted DFS and augments matchings +**************************************************************************/ +idx_t MinCover_Augment(idx_t *xadj, idx_t *adjncy, idx_t col, idx_t *mate, idx_t *flag, idx_t *level, idx_t maxlevel) +{ + idx_t i; + idx_t row = -1; + idx_t status; + + flag[col] = 2; + for (i=xadj[col]; i<xadj[col+1]; i++) { + row = adjncy[i]; + + if (flag[row] == 1) { /* First time through this row node */ + if (level[row] == maxlevel) { /* (col, row) is an edge of the G^T */ + flag[row] = 2; /* Mark this node as being visited */ + if (maxlevel != 0) + status = MinCover_Augment(xadj, adjncy, mate[row], mate, flag, level, maxlevel-1); + else + status = 1; + + if (status) { + mate[col] = row; + mate[row] = col; + return 1; + } + } + } + } + + return 0; +} + + + +/************************************************************************* +* This function performs a coarse decomposition and determines the +* min-cover. +* REF: Pothen ACMTrans. on Amth Software +**************************************************************************/ +void MinCover_Decompose(idx_t *xadj, idx_t *adjncy, idx_t asize, idx_t bsize, idx_t *mate, idx_t *cover, idx_t *csize) +{ + idx_t i, k; + idx_t *where; + idx_t card[10]; + + where = imalloc(bsize, "MinCover_Decompose: where"); + for (i=0; i<10; i++) + card[i] = 0; + + for (i=0; i<asize; i++) + where[i] = SC; + for (; i<bsize; i++) + where[i] = SR; + + for (i=0; i<asize; i++) + if (mate[i] == -1) + MinCover_ColDFS(xadj, adjncy, i, mate, where, INCOL); + for (; i<bsize; i++) + if (mate[i] == -1) + MinCover_RowDFS(xadj, adjncy, i, mate, where, INROW); + + for (i=0; i<bsize; i++) + card[where[i]]++; + + k = 0; + if (iabs(card[VC]+card[SC]-card[HR]) < iabs(card[VC]-card[SR]-card[HR])) { /* S = VC+SC+HR */ + /* printf("%"PRIDX" %"PRIDX" ",vc+sc, hr); */ + for (i=0; i<bsize; i++) + if (where[i] == VC || where[i] == SC || where[i] == HR) + cover[k++] = i; + } + else { /* S = VC+SR+HR */ + /* printf("%"PRIDX" %"PRIDX" ",vc, hr+sr); */ + for (i=0; i<bsize; i++) + if (where[i] == VC || where[i] == SR || where[i] == HR) + cover[k++] = i; + } + + *csize = k; + gk_free((void **)&where, LTERM); + +} + + +/************************************************************************* +* This function perfoms a dfs starting from an unmatched col node +* forming alternate paths +**************************************************************************/ +void MinCover_ColDFS(idx_t *xadj, idx_t *adjncy, idx_t root, idx_t *mate, idx_t *where, idx_t flag) +{ + idx_t i; + + if (flag == INCOL) { + if (where[root] == HC) + return; + where[root] = HC; + for (i=xadj[root]; i<xadj[root+1]; i++) + MinCover_ColDFS(xadj, adjncy, adjncy[i], mate, where, INROW); + } + else { + if (where[root] == HR) + return; + where[root] = HR; + if (mate[root] != -1) + MinCover_ColDFS(xadj, adjncy, mate[root], mate, where, INCOL); + } + +} + +/************************************************************************* +* This function perfoms a dfs starting from an unmatched col node +* forming alternate paths +**************************************************************************/ +void MinCover_RowDFS(idx_t *xadj, idx_t *adjncy, idx_t root, idx_t *mate, idx_t *where, idx_t flag) +{ + idx_t i; + + if (flag == INROW) { + if (where[root] == VR) + return; + where[root] = VR; + for (i=xadj[root]; i<xadj[root+1]; i++) + MinCover_RowDFS(xadj, adjncy, adjncy[i], mate, where, INCOL); + } + else { + if (where[root] == VC) + return; + where[root] = VC; + if (mate[root] != -1) + MinCover_RowDFS(xadj, adjncy, mate[root], mate, where, INROW); + } + +} + + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/mmd.c b/3rdParty/metis/metis-5.1.1/libmetis/mmd.c new file mode 100644 index 000000000..be6d59351 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/mmd.c @@ -0,0 +1,597 @@ +/* + * mmd.c + * + * ************************************************************** + * The following C function was developed from a FORTRAN subroutine + * in SPARSPAK written by Eleanor Chu, Alan George, Joseph Liu + * and Esmond Ng. + * + * The FORTRAN-to-C transformation and modifications such as dynamic + * memory allocation and deallocation were performed by Chunguang + * Sun. + * ************************************************************** + * + * Taken from SMMS, George 12/13/94 + * + * The meaning of invperm, and perm vectors is different from that + * in genqmd_ of SparsPak + * + * $Id: mmd.c 22385 2019-06-03 22:08:48Z karypis $ + */ + +#include "metislib.h" + + +/************************************************************************* +* genmmd -- multiple minimum external degree +* purpose -- this routine implements the minimum degree +* algorithm. it makes use of the implicit representation +* of elimination graphs by quotient graphs, and the notion +* of indistinguishable nodes. It also implements the modifications +* by multiple elimination and minimum external degree. +* Caution -- the adjacency vector adjncy will be destroyed. +* Input parameters -- +* neqns -- number of equations. +* (xadj, adjncy) -- the adjacency structure. +* delta -- tolerance value for multiple elimination. +* maxint -- maximum machine representable (short) integer +* (any smaller estimate will do) for marking nodes. +* Output parameters -- +* perm -- the minimum degree ordering. +* invp -- the inverse of perm. +* *ncsub -- an upper bound on the number of nonzero subscripts +* for the compressed storage scheme. +* Working parameters -- +* head -- vector for head of degree lists. +* invp -- used temporarily for degree forward link. +* perm -- used temporarily for degree backward link. +* qsize -- vector for size of supernodes. +* list -- vector for temporary linked lists. +* marker -- a temporary marker vector. +* Subroutines used -- mmdelm, mmdint, mmdnum, mmdupd. +**************************************************************************/ +void genmmd(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *invp, idx_t *perm, + idx_t delta, idx_t *head, idx_t *qsize, idx_t *list, idx_t *marker, + idx_t maxint, idx_t *ncsub) +{ + idx_t ehead, i, mdeg, mdlmt, mdeg_node, nextmd, num, tag; + + if (neqns <= 0) + return; + + /* Adjust from C to Fortran */ + xadj--; adjncy--; invp--; perm--; head--; qsize--; list--; marker--; + + /* initialization for the minimum degree algorithm. */ + *ncsub = 0; + mmdint(neqns, xadj, adjncy, head, invp, perm, qsize, list, marker); + + /* 'num' counts the number of ordered nodes plus 1. */ + num = 1; + + /* eliminate all isolated nodes. */ + nextmd = head[1]; + while (nextmd > 0) { + mdeg_node = nextmd; + nextmd = invp[mdeg_node]; + marker[mdeg_node] = maxint; + invp[mdeg_node] = -num; + num = num + 1; + } + + /* search for node of the minimum degree. 'mdeg' is the current */ + /* minimum degree; 'tag' is used to facilitate marking nodes. */ + if (num > neqns) + goto n1000; + tag = 1; + head[1] = 0; + mdeg = 2; + + /* infinite loop here ! */ + while (1) { + while (head[mdeg] <= 0) + mdeg++; + + /* use value of 'delta' to set up 'mdlmt', which governs */ + /* when a degree update is to be performed. */ + mdlmt = mdeg + delta; + ehead = 0; + +n500: + mdeg_node = head[mdeg]; + while (mdeg_node <= 0) { + mdeg++; + + if (mdeg > mdlmt) + goto n900; + mdeg_node = head[mdeg]; + }; + + /* remove 'mdeg_node' from the degree structure. */ + nextmd = invp[mdeg_node]; + head[mdeg] = nextmd; + if (nextmd > 0) + perm[nextmd] = -mdeg; + invp[mdeg_node] = -num; + *ncsub += mdeg + qsize[mdeg_node] - 2; + if ((num+qsize[mdeg_node]) > neqns) + goto n1000; + + /* eliminate 'mdeg_node' and perform quotient graph */ + /* transformation. reset 'tag' value if necessary. */ + tag++; + if (tag >= maxint) { + tag = 1; + for (i = 1; i <= neqns; i++) + if (marker[i] < maxint) + marker[i] = 0; + }; + + mmdelm(mdeg_node, xadj, adjncy, head, invp, perm, qsize, list, marker, maxint, tag); + + num += qsize[mdeg_node]; + list[mdeg_node] = ehead; + ehead = mdeg_node; + if (delta >= 0) + goto n500; + + n900: + /* update degrees of the nodes involved in the */ + /* minimum degree nodes elimination. */ + if (num > neqns) + goto n1000; + mmdupd( ehead, neqns, xadj, adjncy, delta, &mdeg, head, invp, perm, qsize, list, marker, maxint, &tag); + }; /* end of -- while ( 1 ) -- */ + +n1000: + mmdnum( neqns, perm, invp, qsize ); + + /* Adjust from Fortran back to C*/ + xadj++; adjncy++; invp++; perm++; head++; qsize++; list++; marker++; +} + + +/************************************************************************** +* mmdelm ...... multiple minimum degree elimination +* Purpose -- This routine eliminates the node mdeg_node of minimum degree +* from the adjacency structure, which is stored in the quotient +* graph format. It also transforms the quotient graph representation +* of the elimination graph. +* Input parameters -- +* mdeg_node -- node of minimum degree. +* maxint -- estimate of maximum representable (short) integer. +* tag -- tag value. +* Updated parameters -- +* (xadj, adjncy) -- updated adjacency structure. +* (head, forward, backward) -- degree doubly linked structure. +* qsize -- size of supernode. +* marker -- marker vector. +* list -- temporary linked list of eliminated nabors. +***************************************************************************/ +void mmdelm(idx_t mdeg_node, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward, + idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker, idx_t maxint, idx_t tag) +{ + idx_t element, i, istop, istart, j, + jstop, jstart, link, + nabor, node, npv, nqnbrs, nxnode, + pvnode, rlmt, rloc, rnode, xqnbr; + + /* find the reachable set of 'mdeg_node' and */ + /* place it in the data structure. */ + marker[mdeg_node] = tag; + istart = xadj[mdeg_node]; + istop = xadj[mdeg_node+1] - 1; + + /* 'element' points to the beginning of the list of */ + /* eliminated nabors of 'mdeg_node', and 'rloc' gives the */ + /* storage location for the next reachable node. */ + element = 0; + rloc = istart; + rlmt = istop; + for ( i = istart; i <= istop; i++ ) { + nabor = adjncy[i]; + if ( nabor == 0 ) break; + if ( marker[nabor] < tag ) { + marker[nabor] = tag; + if ( forward[nabor] < 0 ) { + list[nabor] = element; + element = nabor; + } else { + adjncy[rloc] = nabor; + rloc++; + }; + }; /* end of -- if -- */ + }; /* end of -- for -- */ + + /* merge with reachable nodes from generalized elements. */ + while ( element > 0 ) { + adjncy[rlmt] = -element; + link = element; + +n400: + jstart = xadj[link]; + jstop = xadj[link+1] - 1; + for ( j = jstart; j <= jstop; j++ ) { + node = adjncy[j]; + link = -node; + if ( node < 0 ) goto n400; + if ( node == 0 ) break; + if ((marker[node]<tag)&&(forward[node]>=0)) { + marker[node] = tag; + /*use storage from eliminated nodes if necessary.*/ + while ( rloc >= rlmt ) { + link = -adjncy[rlmt]; + rloc = xadj[link]; + rlmt = xadj[link+1] - 1; + }; + adjncy[rloc] = node; + rloc++; + }; + }; /* end of -- for ( j = jstart; -- */ + element = list[element]; + }; /* end of -- while ( element > 0 ) -- */ + if ( rloc <= rlmt ) adjncy[rloc] = 0; + /* for each node in the reachable set, do the following. */ + link = mdeg_node; + +n1100: + istart = xadj[link]; + istop = xadj[link+1] - 1; + for ( i = istart; i <= istop; i++ ) { + rnode = adjncy[i]; + link = -rnode; + if ( rnode < 0 ) goto n1100; + if ( rnode == 0 ) return; + + /* 'rnode' is in the degree list structure. */ + pvnode = backward[rnode]; + if (( pvnode != 0 ) && ( pvnode != (-maxint) )) { + /* then remove 'rnode' from the structure. */ + nxnode = forward[rnode]; + if ( nxnode > 0 ) backward[nxnode] = pvnode; + if ( pvnode > 0 ) forward[pvnode] = nxnode; + npv = -pvnode; + if ( pvnode < 0 ) head[npv] = nxnode; + }; + + /* purge inactive quotient nabors of 'rnode'. */ + jstart = xadj[rnode]; + jstop = xadj[rnode+1] - 1; + xqnbr = jstart; + for ( j = jstart; j <= jstop; j++ ) { + nabor = adjncy[j]; + if ( nabor == 0 ) break; + if ( marker[nabor] < tag ) { + adjncy[xqnbr] = nabor; + xqnbr++; + }; + }; + + /* no active nabor after the purging. */ + nqnbrs = xqnbr - jstart; + if ( nqnbrs <= 0 ) { + /* merge 'rnode' with 'mdeg_node'. */ + qsize[mdeg_node] += qsize[rnode]; + qsize[rnode] = 0; + marker[rnode] = maxint; + forward[rnode] = -mdeg_node; + backward[rnode] = -maxint; + } else { + /* flag 'rnode' for degree update, and */ + /* add 'mdeg_node' as a nabor of 'rnode'. */ + forward[rnode] = nqnbrs + 1; + backward[rnode] = 0; + adjncy[xqnbr] = mdeg_node; + xqnbr++; + if ( xqnbr <= jstop ) adjncy[xqnbr] = 0; + }; + }; /* end of -- for ( i = istart; -- */ + return; + } + +/*************************************************************************** +* mmdint ---- mult minimum degree initialization +* purpose -- this routine performs initialization for the +* multiple elimination version of the minimum degree algorithm. +* input parameters -- +* neqns -- number of equations. +* (xadj, adjncy) -- adjacency structure. +* output parameters -- +* (head, dfrow, backward) -- degree doubly linked structure. +* qsize -- size of supernode ( initialized to one). +* list -- linked list. +* marker -- marker vector. +****************************************************************************/ +idx_t mmdint(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward, + idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker) +{ + idx_t fnode, ndeg, node; + + for ( node = 1; node <= neqns; node++ ) { + head[node] = 0; + qsize[node] = 1; + marker[node] = 0; + list[node] = 0; + }; + + /* initialize the degree doubly linked lists. */ + for ( node = 1; node <= neqns; node++ ) { + // The following is something that Olaf Schenk identified as potentially a + // bug that I introduced in the original code. For now, I reverted back + // to the original code until I have some time to check. + // ndeg = xadj[node+1] - xadj[node]/* + 1*/; /* george */ + ndeg = xadj[node+1] - xadj[node] + 1; + if (ndeg == 0) + ndeg = 1; + fnode = head[ndeg]; + forward[node] = fnode; + head[ndeg] = node; + if ( fnode > 0 ) backward[fnode] = node; + backward[node] = -ndeg; + }; + return 0; +} + +/**************************************************************************** +* mmdnum --- multi minimum degree numbering +* purpose -- this routine performs the final step in producing +* the permutation and inverse permutation vectors in the +* multiple elimination version of the minimum degree +* ordering algorithm. +* input parameters -- +* neqns -- number of equations. +* qsize -- size of supernodes at elimination. +* updated parameters -- +* invp -- inverse permutation vector. on input, +* if qsize[node] = 0, then node has been merged +* into the node -invp[node]; otherwise, +* -invp[node] is its inverse labelling. +* output parameters -- +* perm -- the permutation vector. +****************************************************************************/ +void mmdnum(idx_t neqns, idx_t *perm, idx_t *invp, idx_t *qsize) +{ + idx_t father, nextf, node, nqsize, num, root; + + for ( node = 1; node <= neqns; node++ ) { + nqsize = qsize[node]; + if ( nqsize <= 0 ) perm[node] = invp[node]; + if ( nqsize > 0 ) perm[node] = -invp[node]; + }; + + /* for each node which has been merged, do the following. */ + for ( node = 1; node <= neqns; node++ ) { + if ( perm[node] <= 0 ) { + + /* trace the merged tree until one which has not */ + /* been merged, call it root. */ + father = node; + while ( perm[father] <= 0 ) + father = - perm[father]; + + /* number node after root. */ + root = father; + num = perm[root] + 1; + invp[node] = -num; + perm[root] = num; + + /* shorten the merged tree. */ + father = node; + nextf = - perm[father]; + while ( nextf > 0 ) { + perm[father] = -root; + father = nextf; + nextf = -perm[father]; + }; + }; /* end of -- if ( perm[node] <= 0 ) -- */ + }; /* end of -- for ( node = 1; -- */ + + /* ready to compute perm. */ + for ( node = 1; node <= neqns; node++ ) { + num = -invp[node]; + invp[node] = num; + perm[num] = node; + }; + return; +} + +/**************************************************************************** +* mmdupd ---- multiple minimum degree update +* purpose -- this routine updates the degrees of nodes after a +* multiple elimination step. +* input parameters -- +* ehead -- the beginning of the list of eliminated nodes +* (i.e., newly formed elements). +* neqns -- number of equations. +* (xadj, adjncy) -- adjacency structure. +* delta -- tolerance value for multiple elimination. +* maxint -- maximum machine representable (short) integer. +* updated parameters -- +* mdeg -- new minimum degree after degree update. +* (head, forward, backward) -- degree doubly linked structure. +* qsize -- size of supernode. +* list -- marker vector for degree update. +* *tag -- tag value. +****************************************************************************/ +void mmdupd(idx_t ehead, idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t delta, idx_t *mdeg, + idx_t *head, idx_t *forward, idx_t *backward, idx_t *qsize, idx_t *list, + idx_t *marker, idx_t maxint, idx_t *tag) +{ + idx_t deg, deg0, element, enode, fnode, i, iq2, istop, + istart, j, jstop, jstart, link, mdeg0, mtag, nabor, + node, q2head, qxhead; + + mdeg0 = *mdeg + delta; + element = ehead; + +n100: + if ( element <= 0 ) return; + + /* for each of the newly formed element, do the following. */ + /* reset tag value if necessary. */ + mtag = *tag + mdeg0; + if ( mtag >= maxint ) { + *tag = 1; + for ( i = 1; i <= neqns; i++ ) + if ( marker[i] < maxint ) marker[i] = 0; + mtag = *tag + mdeg0; + }; + + /* create two linked lists from nodes associated with 'element': */ + /* one with two nabors (q2head) in the adjacency structure, and the*/ + /* other with more than two nabors (qxhead). also compute 'deg0',*/ + /* number of nodes in this element. */ + q2head = 0; + qxhead = 0; + deg0 = 0; + link =element; + +n400: + istart = xadj[link]; + istop = xadj[link+1] - 1; + for ( i = istart; i <= istop; i++ ) { + enode = adjncy[i]; + link = -enode; + if ( enode < 0 ) goto n400; + if ( enode == 0 ) break; + if ( qsize[enode] != 0 ) { + deg0 += qsize[enode]; + marker[enode] = mtag; + + /*'enode' requires a degree update*/ + if ( backward[enode] == 0 ) { + /* place either in qxhead or q2head list. */ + if ( forward[enode] != 2 ) { + list[enode] = qxhead; + qxhead = enode; + } else { + list[enode] = q2head; + q2head = enode; + }; + }; + }; /* enf of -- if ( qsize[enode] != 0 ) -- */ + }; /* end of -- for ( i = istart; -- */ + + /* for each node in q2 list, do the following. */ + enode = q2head; + iq2 = 1; + +n900: + if ( enode <= 0 ) goto n1500; + if ( backward[enode] != 0 ) goto n2200; + (*tag)++; + deg = deg0; + + /* identify the other adjacent element nabor. */ + istart = xadj[enode]; + nabor = adjncy[istart]; + if ( nabor == element ) nabor = adjncy[istart+1]; + link = nabor; + if ( forward[nabor] >= 0 ) { + /* nabor is uneliminated, increase degree count. */ + deg += qsize[nabor]; + goto n2100; + }; + + /* the nabor is eliminated. for each node in the 2nd element */ + /* do the following. */ +n1000: + istart = xadj[link]; + istop = xadj[link+1] - 1; + for ( i = istart; i <= istop; i++ ) { + node = adjncy[i]; + link = -node; + if ( node != enode ) { + if ( node < 0 ) goto n1000; + if ( node == 0 ) goto n2100; + if ( qsize[node] != 0 ) { + if ( marker[node] < *tag ) { + /* 'node' is not yet considered. */ + marker[node] = *tag; + deg += qsize[node]; + } else { + if ( backward[node] == 0 ) { + if ( forward[node] == 2 ) { + /* 'node' is indistinguishable from 'enode'.*/ + /* merge them into a new supernode. */ + qsize[enode] += qsize[node]; + qsize[node] = 0; + marker[node] = maxint; + forward[node] = -enode; + backward[node] = -maxint; + } else { + /* 'node' is outmacthed by 'enode' */ + if (backward[node]==0) backward[node] = -maxint; + }; + }; /* end of -- if ( backward[node] == 0 ) -- */ + }; /* end of -- if ( marker[node] < *tag ) -- */ + }; /* end of -- if ( qsize[node] != 0 ) -- */ + }; /* end of -- if ( node != enode ) -- */ + }; /* end of -- for ( i = istart; -- */ + goto n2100; + +n1500: + /* for each 'enode' in the 'qx' list, do the following. */ + enode = qxhead; + iq2 = 0; + +n1600: if ( enode <= 0 ) goto n2300; + if ( backward[enode] != 0 ) goto n2200; + (*tag)++; + deg = deg0; + + /*for each unmarked nabor of 'enode', do the following.*/ + istart = xadj[enode]; + istop = xadj[enode+1] - 1; + for ( i = istart; i <= istop; i++ ) { + nabor = adjncy[i]; + if ( nabor == 0 ) break; + if ( marker[nabor] < *tag ) { + marker[nabor] = *tag; + link = nabor; + if ( forward[nabor] >= 0 ) + /*if uneliminated, include it in deg count.*/ + deg += qsize[nabor]; + else { +n1700: + /* if eliminated, include unmarked nodes in this*/ + /* element into the degree count. */ + jstart = xadj[link]; + jstop = xadj[link+1] - 1; + for ( j = jstart; j <= jstop; j++ ) { + node = adjncy[j]; + link = -node; + if ( node < 0 ) goto n1700; + if ( node == 0 ) break; + if ( marker[node] < *tag ) { + marker[node] = *tag; + deg += qsize[node]; + }; + }; /* end of -- for ( j = jstart; -- */ + }; /* end of -- if ( forward[nabor] >= 0 ) -- */ + }; /* end of -- if ( marker[nabor] < *tag ) -- */ + }; /* end of -- for ( i = istart; -- */ + +n2100: + /* update external degree of 'enode' in degree structure, */ + /* and '*mdeg' if necessary. */ + deg = deg - qsize[enode] + 1; + fnode = head[deg]; + forward[enode] = fnode; + backward[enode] = -deg; + if ( fnode > 0 ) backward[fnode] = enode; + head[deg] = enode; + if ( deg < *mdeg ) *mdeg = deg; + +n2200: + /* get next enode in current element. */ + enode = list[enode]; + if ( iq2 == 1 ) goto n900; + goto n1600; + +n2300: + /* get next element in the list. */ + *tag = mtag; + element = list[element]; + goto n100; + } diff --git a/3rdParty/metis/metis-5.1.1/libmetis/ometis.c b/3rdParty/metis/metis-5.1.1/libmetis/ometis.c new file mode 100644 index 000000000..51e39754c --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/ometis.c @@ -0,0 +1,701 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * ometis.c + * + * This file contains the top level routines for the multilevel recursive + * bisection algorithm PMETIS. + * + * Started 7/24/97 + * George + * + * $Id: ometis.c 10513 2011-07-07 22:06:03Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point for the multilevel nested dissection + ordering code. At each bisection, a node-separator is computed using + a node-based refinement approach. + + \param nvtxs is the number of vertices in the graph. + \param xadj is of length nvtxs+1 marking the start of the adjancy + list of each vertex in adjncy. + \param adjncy stores the adjacency lists of the vertices. The adjnacy + list of a vertex should not contain the vertex itself. + \param vwgt is an array of size nvtxs storing the weight of each + vertex. If vwgt is NULL, then the vertices are considered + to have unit weight. + \param numflag is either 0 or 1 indicating that the numbering of + the vertices starts from 0 or 1, respectively. + \param options is an array of size METIS_NOPTIONS used to pass + various options impacting the of the algorithm. A NULL + value indicates use of default options. + \param perm is an array of size nvtxs such that if A and A' are + the original and permuted matrices, then A'[i] = A[perm[i]]. + \param iperm is an array of size nvtxs such that if A and A' are + the original and permuted matrices, then A[i] = A'[iperm[i]]. +*/ +/*************************************************************************/ +int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t *options, idx_t *perm, idx_t *iperm) +{ + int sigrval=0, renumber=0; + idx_t i, ii, j, l, nnvtxs=0; + graph_t *graph=NULL; + ctrl_t *ctrl; + idx_t *cptr, *cind, *piperm; + int numflag = 0; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* set up the run time parameters */ + ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; + } + + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { + Change2CNumbering(*nvtxs, xadj, adjncy); + renumber = 1; + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + /* prune the dense columns */ + if (ctrl->pfactor > 0.0) { + piperm = imalloc(*nvtxs, "OMETIS: piperm"); + + graph = PruneGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor); + if (graph == NULL) { + /* if there was no prunning, cleanup the pfactor */ + gk_free((void **)&piperm, LTERM); + ctrl->pfactor = 0.0; + } + else { + nnvtxs = graph->nvtxs; + ctrl->compress = 0; /* disable compression if prunning took place */ + } + } + + /* compress the graph; note that compression only happens if not prunning + has taken place. */ + if (ctrl->compress) { + cptr = imalloc(*nvtxs+1, "OMETIS: cptr"); + cind = imalloc(*nvtxs, "OMETIS: cind"); + + graph = CompressGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, cptr, cind); + if (graph == NULL) { + /* if there was no compression, cleanup the compress flag */ + gk_free((void **)&cptr, &cind, LTERM); + ctrl->compress = 0; + } + else { + nnvtxs = graph->nvtxs; + ctrl->cfactor = 1.0*(*nvtxs)/nnvtxs; + if (ctrl->cfactor > 1.5 && ctrl->nseps == 1) + ctrl->nseps = 2; + //ctrl->nseps = (idx_t)(ctrl->cfactor*ctrl->nseps); + } + } + + /* if no prunning and no compression, setup the graph in the normal way. */ + if (ctrl->pfactor == 0.0 && ctrl->compress == 0) + graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); + + ASSERT(CheckGraph(graph, ctrl->numflag, 1)); + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); + + /* do the nested dissection ordering */ + if (ctrl->ccorder) + MlevelNestedDissectionCC(ctrl, graph, iperm, graph->nvtxs); + else + MlevelNestedDissection(ctrl, graph, iperm, graph->nvtxs); + + + if (ctrl->pfactor > 0.0) { /* Order any prunned vertices */ + icopy(nnvtxs, iperm, perm); /* Use perm as an auxiliary array */ + for (i=0; i<nnvtxs; i++) + iperm[piperm[i]] = perm[i]; + for (i=nnvtxs; i<*nvtxs; i++) + iperm[piperm[i]] = i; + + gk_free((void **)&piperm, LTERM); + } + else if (ctrl->compress) { /* Uncompress the ordering */ + /* construct perm from iperm */ + for (i=0; i<nnvtxs; i++) + perm[iperm[i]] = i; + for (l=ii=0; ii<nnvtxs; ii++) { + i = perm[ii]; + for (j=cptr[i]; j<cptr[i+1]; j++) + iperm[cind[j]] = l++; + } + + gk_free((void **)&cptr, &cind, LTERM); + } + + for (i=0; i<*nvtxs; i++) + perm[iperm[i]] = i; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); + +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) + Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} + + +/*************************************************************************/ +/*! This is the driver for the recursive tri-section of a graph into the + left, separator, and right partitions. The graphs correspond to the + left and right parts are further tri-sected in a recursive fashion. + The nodes in the separator are ordered at the end of the left & right + nodes. + */ +/*************************************************************************/ +void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx) +{ + idx_t i, j, nvtxs, nbnd; + idx_t *label, *bndind; + graph_t *lgraph, *rgraph; + + nvtxs = graph->nvtxs; + + MlevelNodeBisectionMultiple(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); + + + /* Order the nodes in the separator */ + nbnd = graph->nbnd; + bndind = graph->bndind; + label = graph->label; + for (i=0; i<nbnd; i++) + order[label[bndind[i]]] = --lastvtx; + + SplitGraphOrder(ctrl, graph, &lgraph, &rgraph); + + /* Free the memory of the top level graph */ + FreeGraph(&graph); + + /* Recurse on lgraph first, as its lastvtx depends on rgraph->nvtxs, which + will not be defined upon return from MlevelNestedDissection. */ + if (lgraph->nvtxs > MMDSWITCH && lgraph->nedges > 0) + MlevelNestedDissection(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); + else { + MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); + FreeGraph(&lgraph); + } + if (rgraph->nvtxs > MMDSWITCH && rgraph->nedges > 0) + MlevelNestedDissection(ctrl, rgraph, order, lastvtx); + else { + MMDOrder(ctrl, rgraph, order, lastvtx); + FreeGraph(&rgraph); + } +} + + +/*************************************************************************/ +/*! This routine is similar to its non 'CC' counterpart. The difference is + that after each tri-section, the connected components of the original + graph that result after removing the separator vertises are ordered + independently (i.e., this may lead to more than just the left and + the right subgraphs). +*/ +/*************************************************************************/ +void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx) +{ + idx_t i, j, nvtxs, nbnd, ncmps, rnvtxs, snvtxs; + idx_t *label, *bndind; + idx_t *cptr, *cind; + graph_t **sgraphs; + + nvtxs = graph->nvtxs; + + MlevelNodeBisectionMultiple(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); + + /* Order the nodes in the separator */ + nbnd = graph->nbnd; + bndind = graph->bndind; + label = graph->label; + for (i=0; i<nbnd; i++) + order[label[bndind[i]]] = --lastvtx; + + WCOREPUSH; + cptr = iwspacemalloc(ctrl, nvtxs+1); + cind = iwspacemalloc(ctrl, nvtxs); + ncmps = FindSepInducedComponents(ctrl, graph, cptr, cind); + + if (ctrl->dbglvl&METIS_DBG_INFO) { + if (ncmps > 2) + printf(" Bisection resulted in %"PRIDX" connected components\n", ncmps); + } + + sgraphs = SplitGraphOrderCC(ctrl, graph, ncmps, cptr, cind); + + WCOREPOP; + + /* Free the memory of the top level graph */ + FreeGraph(&graph); + + /* Go and process the subgraphs */ + for (rnvtxs=i=0; i<ncmps; i++) { + /* Save the number of vertices in sgraphs[i] because sgraphs[i] is freed + inside MlevelNestedDissectionCC, and as such it will be undefined. */ + snvtxs = sgraphs[i]->nvtxs; + + if (sgraphs[i]->nvtxs > MMDSWITCH && sgraphs[i]->nedges > 0) { + MlevelNestedDissectionCC(ctrl, sgraphs[i], order, lastvtx-rnvtxs); + } + else { + MMDOrder(ctrl, sgraphs[i], order, lastvtx-rnvtxs); + FreeGraph(&sgraphs[i]); + } + rnvtxs += snvtxs; + } + + gk_free((void **)&sgraphs, LTERM); +} + + +/*************************************************************************/ +/*! This function performs multilevel node bisection (i.e., tri-section). + It performs multiple bisections and selects the best. */ +/*************************************************************************/ +void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, mincut; + idx_t *bestwhere; + + /* if the graph is small, just find a single vertex separator */ + if (ctrl->nseps == 1 || graph->nvtxs < (ctrl->compress ? 1000 : 2000)) { + MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS); + return; + } + + WCOREPUSH; + + bestwhere = iwspacemalloc(ctrl, graph->nvtxs); + + mincut = graph->tvwgt[0]; + for (i=0; i<ctrl->nseps; i++) { + MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS); + + if (i == 0 || graph->mincut < mincut) { + mincut = graph->mincut; + if (i < ctrl->nseps-1) + icopy(graph->nvtxs, graph->where, bestwhere); + } + + if (mincut == 0) + break; + + if (i < ctrl->nseps-1) + FreeRData(graph); + } + + if (mincut != graph->mincut) { + icopy(graph->nvtxs, bestwhere, graph->where); + Compute2WayNodePartitionParams(ctrl, graph); + } + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs multilevel node bisection (i.e., tri-section). + It performs multiple bisections and selects the best. */ +/*************************************************************************/ +void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts) +{ + idx_t i, mincut, nruns=5; + graph_t *cgraph; + idx_t *bestwhere; + + /* if the graph is small, just find a single vertex separator */ + if (graph->nvtxs < 5000) { + MlevelNodeBisectionL1(ctrl, graph, niparts); + return; + } + + WCOREPUSH; + + ctrl->CoarsenTo = gk_max(100, graph->nvtxs/30); + + cgraph = CoarsenGraphNlevels(ctrl, graph, 4); + + bestwhere = iwspacemalloc(ctrl, cgraph->nvtxs); + + mincut = graph->tvwgt[0]; + for (i=0; i<nruns; i++) { + MlevelNodeBisectionL1(ctrl, cgraph, 0.7*niparts); + + if (i == 0 || cgraph->mincut < mincut) { + mincut = cgraph->mincut; + if (i < nruns-1) + icopy(cgraph->nvtxs, cgraph->where, bestwhere); + } + + if (mincut == 0) + break; + + if (i < nruns-1) + FreeRData(cgraph); + } + + if (mincut != cgraph->mincut) + icopy(cgraph->nvtxs, bestwhere, cgraph->where); + + WCOREPOP; + + Refine2WayNode(ctrl, graph, cgraph); + +} + + +/*************************************************************************/ +/*! The top-level routine of the actual multilevel node bisection */ +/*************************************************************************/ +void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts) +{ + graph_t *cgraph; + + ctrl->CoarsenTo = graph->nvtxs/8; + if (ctrl->CoarsenTo > 100) + ctrl->CoarsenTo = 100; + else if (ctrl->CoarsenTo < 40) + ctrl->CoarsenTo = 40; + + cgraph = CoarsenGraph(ctrl, graph); + + niparts = gk_max(1, (cgraph->nvtxs <= ctrl->CoarsenTo ? niparts/2: niparts)); + /*niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS);*/ + InitSeparator(ctrl, cgraph, niparts); + + Refine2WayNode(ctrl, graph, cgraph); +} + + +/*************************************************************************/ +/*! This function takes a graph and a tri-section (left, right, separator) + and splits it into two graphs. + + This function relies on the fact that adjwgt is all equal to 1. +*/ +/*************************************************************************/ +void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph) +{ + idx_t i, ii, j, k, l, istart, iend, mypart, nvtxs, snvtxs[3], snedges[3]; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind; + idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2]; + idx_t *rename; + idx_t *auxadjncy; + graph_t *lgraph, *rgraph; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + ASSERT(bndptr != NULL); + + rename = iwspacemalloc(ctrl, nvtxs); + + snvtxs[0] = snvtxs[1] = snvtxs[2] = snedges[0] = snedges[1] = snedges[2] = 0; + for (i=0; i<nvtxs; i++) { + k = where[i]; + rename[i] = snvtxs[k]++; + snedges[k] += xadj[i+1]-xadj[i]; + } + + lgraph = SetupSplitGraph(graph, snvtxs[0], snedges[0]); + sxadj[0] = lgraph->xadj; + svwgt[0] = lgraph->vwgt; + sadjncy[0] = lgraph->adjncy; + sadjwgt[0] = lgraph->adjwgt; + slabel[0] = lgraph->label; + + rgraph = SetupSplitGraph(graph, snvtxs[1], snedges[1]); + sxadj[1] = rgraph->xadj; + svwgt[1] = rgraph->vwgt; + sadjncy[1] = rgraph->adjncy; + sadjwgt[1] = rgraph->adjwgt; + slabel[1] = rgraph->label; + + /* Go and use bndptr to also mark the boundary nodes in the two partitions */ + for (ii=0; ii<graph->nbnd; ii++) { + i = bndind[ii]; + for (j=xadj[i]; j<xadj[i+1]; j++) + bndptr[adjncy[j]] = 1; + } + + snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0; + sxadj[0][0] = sxadj[1][0] = 0; + for (i=0; i<nvtxs; i++) { + if ((mypart = where[i]) == 2) + continue; + + istart = xadj[i]; + iend = xadj[i+1]; + if (bndptr[i] == -1) { /* This is an interior vertex */ + auxadjncy = sadjncy[mypart] + snedges[mypart] - istart; + for(j=istart; j<iend; j++) + auxadjncy[j] = adjncy[j]; + snedges[mypart] += iend-istart; + } + else { + auxadjncy = sadjncy[mypart]; + l = snedges[mypart]; + for (j=istart; j<iend; j++) { + k = adjncy[j]; + if (where[k] == mypart) + auxadjncy[l++] = k; + } + snedges[mypart] = l; + } + + svwgt[mypart][snvtxs[mypart]] = vwgt[i]; + slabel[mypart][snvtxs[mypart]] = label[i]; + sxadj[mypart][++snvtxs[mypart]] = snedges[mypart]; + } + + for (mypart=0; mypart<2; mypart++) { + iend = snedges[mypart]; + iset(iend, 1, sadjwgt[mypart]); + + auxadjncy = sadjncy[mypart]; + for (i=0; i<iend; i++) + auxadjncy[i] = rename[auxadjncy[i]]; + } + + lgraph->nvtxs = snvtxs[0]; + lgraph->nedges = snedges[0]; + rgraph->nvtxs = snvtxs[1]; + rgraph->nedges = snedges[1]; + + SetupGraph_tvwgt(lgraph); + SetupGraph_tvwgt(rgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); + + *r_lgraph = lgraph; + *r_rgraph = rgraph; + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function takes a graph and generates a set of graphs, each of + which is a connected component in the original graph. + + This function relies on the fact that adjwgt is all equal to 1. + + \param ctrl stores run state info. + \param graph is the graph to be split. + \param ncmps is the number of connected components. + \param cptr is an array of size ncmps+1 that marks the start and end + locations of the vertices in cind that make up the respective + components (i.e., cptr, cind is in CSR format). + \param cind is an array of size equal to the number of vertices in + the original graph and stores the vertices that belong to each + connected component. + + \returns an array of subgraphs corresponding to the extracted subgraphs. +*/ +/*************************************************************************/ +graph_t **SplitGraphOrderCC(ctrl_t *ctrl, graph_t *graph, idx_t ncmps, + idx_t *cptr, idx_t *cind) +{ + idx_t i, ii, iii, j, k, l, istart, iend, mypart, nvtxs, snvtxs, snedges; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind; + idx_t *sxadj, *svwgt, *sadjncy, *sadjwgt, *slabel; + idx_t *rename; + idx_t *auxadjncy; + graph_t **sgraphs; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; + bndind = graph->bndind; + ASSERT(bndptr != NULL); + + /* Go and use bndptr to also mark the boundary nodes in the two partitions */ + for (ii=0; ii<graph->nbnd; ii++) { + i = bndind[ii]; + for (j=xadj[i]; j<xadj[i+1]; j++) + bndptr[adjncy[j]] = 1; + } + + rename = iwspacemalloc(ctrl, nvtxs); + + sgraphs = (graph_t **)gk_malloc(sizeof(graph_t *)*ncmps, "SplitGraphOrderCC: sgraphs"); + + /* Go and split the graph a component at a time */ + for (iii=0; iii<ncmps; iii++) { + irandArrayPermute(cptr[iii+1]-cptr[iii], cind+cptr[iii], cptr[iii+1]-cptr[iii], 0); + snvtxs = snedges = 0; + for (j=cptr[iii]; j<cptr[iii+1]; j++) { + i = cind[j]; + rename[i] = snvtxs++; + snedges += xadj[i+1]-xadj[i]; + } + + sgraphs[iii] = SetupSplitGraph(graph, snvtxs, snedges); + + sxadj = sgraphs[iii]->xadj; + svwgt = sgraphs[iii]->vwgt; + sadjncy = sgraphs[iii]->adjncy; + sadjwgt = sgraphs[iii]->adjwgt; + slabel = sgraphs[iii]->label; + + snvtxs = snedges = sxadj[0] = 0; + for (ii=cptr[iii]; ii<cptr[iii+1]; ii++) { + i = cind[ii]; + + istart = xadj[i]; + iend = xadj[i+1]; + if (bndptr[i] == -1) { /* This is an interior vertex */ + auxadjncy = sadjncy + snedges - istart; + for(j=istart; j<iend; j++) + auxadjncy[j] = adjncy[j]; + snedges += iend-istart; + } + else { + l = snedges; + for (j=istart; j<iend; j++) { + k = adjncy[j]; + if (where[k] != 2) + sadjncy[l++] = k; + } + snedges = l; + } + + svwgt[snvtxs] = vwgt[i]; + slabel[snvtxs] = label[i]; + sxadj[++snvtxs] = snedges; + } + + iset(snedges, 1, sadjwgt); + for (i=0; i<snedges; i++) + sadjncy[i] = rename[sadjncy[i]]; + + sgraphs[iii]->nvtxs = snvtxs; + sgraphs[iii]->nedges = snedges; + + SetupGraph_tvwgt(sgraphs[iii]); + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); + + WCOREPOP; + + return sgraphs; +} + + +/*************************************************************************/ +/*! This function uses MMD to order the graph. The vertices are numbered + from lastvtx downwards. */ +/*************************************************************************/ +void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx) +{ + idx_t i, j, k, nvtxs, nofsub, firstvtx; + idx_t *xadj, *adjncy, *label; + idx_t *perm, *iperm, *head, *qsize, *list, *marker; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + /* Relabel the vertices so that it starts from 1 */ + k = xadj[nvtxs]; + for (i=0; i<k; i++) + adjncy[i]++; + for (i=0; i<nvtxs+1; i++) + xadj[i]++; + + perm = iwspacemalloc(ctrl, nvtxs+5); + iperm = iwspacemalloc(ctrl, nvtxs+5); + head = iwspacemalloc(ctrl, nvtxs+5); + qsize = iwspacemalloc(ctrl, nvtxs+5); + list = iwspacemalloc(ctrl, nvtxs+5); + marker = iwspacemalloc(ctrl, nvtxs+5); + + genmmd(nvtxs, xadj, adjncy, iperm, perm, 1, head, qsize, list, marker, IDX_MAX, &nofsub); + + label = graph->label; + firstvtx = lastvtx-nvtxs; + for (i=0; i<nvtxs; i++) + order[label[i]] = firstvtx+iperm[i]-1; + + /* Relabel the vertices so that it starts from 0 */ + for (i=0; i<nvtxs+1; i++) + xadj[i]--; + k = xadj[nvtxs]; + for (i=0; i<k; i++) + adjncy[i]--; + + WCOREPOP; +} + + + + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/options.c b/3rdParty/metis/metis-5.1.1/libmetis/options.c new file mode 100644 index 000000000..0ca0bd2ae --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/options.c @@ -0,0 +1,541 @@ +/** + \file + \brief This file contains various routines for dealing with options and ctrl_t. + + \date Started 5/12/2011 + \author George + \author Copyright 1997-2011, Regents of the University of Minnesota + \version\verbatim $Id: options.c 17717 2014-10-03 19:09:31Z dominique $ \endverbatim + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function creates and sets the run parameters (ctrl_t) */ +/*************************************************************************/ +ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, + real_t *tpwgts, real_t *ubvec) +{ + idx_t i, j; + ctrl_t *ctrl; + + ctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "SetupCtrl: ctrl"); + + memset((void *)ctrl, 0, sizeof(ctrl_t)); + + ctrl->pid = getpid(); + + switch (optype) { + case METIS_OP_PMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); + ctrl->rtype = METIS_RTYPE_FM; + ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + + if (ncon == 1) { + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_GROW); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, PMETIS_DEFAULT_UFACTOR); + ctrl->CoarsenTo = 20; + } + else { + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_RANDOM); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, MCPMETIS_DEFAULT_UFACTOR); + ctrl->CoarsenTo = 100; + } + + break; + + + case METIS_OP_KMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_METISRB); + ctrl->rtype = METIS_RTYPE_GREEDY; + ctrl->nIparts = GETOPTION(options, METIS_OPTION_NIPARTS, -1); + ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, KMETIS_DEFAULT_UFACTOR); + ctrl->minconn = GETOPTION(options, METIS_OPTION_MINCONN, 0); + ctrl->contig = GETOPTION(options, METIS_OPTION_CONTIG, 0); + break; + + + case METIS_OP_OMETIS: + ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_NODE); + ctrl->rtype = GETOPTION(options, METIS_OPTION_RTYPE, METIS_RTYPE_SEP1SIDED); + ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_EDGE); + ctrl->nseps = GETOPTION(options, METIS_OPTION_NSEPS, 1); + ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); + ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, OMETIS_DEFAULT_UFACTOR); + ctrl->compress = GETOPTION(options, METIS_OPTION_COMPRESS, 1); + ctrl->ccorder = GETOPTION(options, METIS_OPTION_CCORDER, 0); + ctrl->pfactor = 0.1*GETOPTION(options, METIS_OPTION_PFACTOR, 0); + + ctrl->CoarsenTo = 100; + break; + + default: + gk_errexit(SIGERR, "Unknown optype of %d\n", optype); + } + + /* common options */ + ctrl->ctype = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM); + ctrl->no2hop = GETOPTION(options, METIS_OPTION_NO2HOP, 0); + ctrl->ondisk = GETOPTION(options, METIS_OPTION_ONDISK, 0); + ctrl->seed = GETOPTION(options, METIS_OPTION_SEED, -1); + ctrl->dbglvl = GETOPTION(options, METIS_OPTION_DBGLVL, 0); + ctrl->numflag = GETOPTION(options, METIS_OPTION_NUMBERING, 0); + ctrl->dropedges = GETOPTION(options, METIS_OPTION_DROPEDGES, 0); + + /* set non-option information */ + ctrl->optype = optype; + ctrl->ncon = ncon; + ctrl->nparts = nparts; + ctrl->maxvwgt = ismalloc(ncon, 0, "SetupCtrl: maxvwgt"); + + /* setup the target partition weights */ + if (ctrl->optype != METIS_OP_OMETIS) { + ctrl->tpwgts = rsmalloc((nparts+2)*ncon, 0.0, "SetupCtrl: ctrl->tpwgts"); + if (tpwgts) { + rcopy(nparts*ncon, tpwgts, ctrl->tpwgts); + } + else { + for (i=0; i<nparts; i++) { + for (j=0; j<ncon; j++) + ctrl->tpwgts[i*ncon+j] = 1.0/nparts; + } + } + } + else { /* METIS_OP_OMETIS */ + /* this is required to allow the pijbm to be defined properly for + the edge-based refinement during initial partitioning */ + ctrl->tpwgts = rsmalloc(2, .5, "SetupCtrl: ctrl->tpwgts"); + } + + + /* setup the ubfactors */ + ctrl->ubfactors = rsmalloc(ctrl->ncon, I2RUBFACTOR(ctrl->ufactor), "SetupCtrl: ubfactors"); + if (ubvec) + rcopy(ctrl->ncon, ubvec, ctrl->ubfactors); + for (i=0; i<ctrl->ncon; i++) + ctrl->ubfactors[i] += 0.0000499; + + /* Allocate memory for balance multipliers. + Note that for PMETIS/OMETIS routines the memory allocated is more + than required as balance multipliers for 2 parts is sufficient. */ + ctrl->pijbm = rmalloc(nparts*ncon, "SetupCtrl: ctrl->pijbm"); + + InitRandom(ctrl->seed); + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, PrintCtrl(ctrl)); + + if (!CheckParams(ctrl)) { + FreeCtrl(&ctrl); + return NULL; + } + else { + return ctrl; + } +} + + +/*************************************************************************/ +/*! Computes the per-partition/constraint balance multipliers */ +/*************************************************************************/ +void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j; + + for (i=0; i<ctrl->nparts; i++) { + for (j=0; j<graph->ncon; j++) + ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/ctrl->tpwgts[i*graph->ncon+j]; + } +} + + +/*************************************************************************/ +/*! Computes the per-partition/constraint balance multipliers */ +/*************************************************************************/ +void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts) +{ + idx_t i, j; + + for (i=0; i<2; i++) { + for (j=0; j<graph->ncon; j++) + ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/tpwgts[i*graph->ncon+j]; + } +} + + +/*************************************************************************/ +/*! This function prints the various control fields */ +/*************************************************************************/ +void PrintCtrl(ctrl_t *ctrl) +{ + idx_t i, j, modnum; + + printf(" Runtime parameters:\n"); + + printf(" Objective type: "); + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + printf("METIS_OBJTYPE_CUT\n"); + break; + case METIS_OBJTYPE_VOL: + printf("METIS_OBJTYPE_VOL\n"); + break; + case METIS_OBJTYPE_NODE: + printf("METIS_OBJTYPE_NODE\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Coarsening type: "); + switch (ctrl->ctype) { + case METIS_CTYPE_RM: + printf("METIS_CTYPE_RM\n"); + break; + case METIS_CTYPE_SHEM: + printf("METIS_CTYPE_SHEM\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Initial partitioning type: "); + switch (ctrl->iptype) { + case METIS_IPTYPE_GROW: + printf("METIS_IPTYPE_GROW\n"); + break; + case METIS_IPTYPE_RANDOM: + printf("METIS_IPTYPE_RANDOM\n"); + break; + case METIS_IPTYPE_EDGE: + printf("METIS_IPTYPE_EDGE\n"); + break; + case METIS_IPTYPE_NODE: + printf("METIS_IPTYPE_NODE\n"); + break; + case METIS_IPTYPE_METISRB: + printf("METIS_IPTYPE_METISRB\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Refinement type: "); + switch (ctrl->rtype) { + case METIS_RTYPE_FM: + printf("METIS_RTYPE_FM\n"); + break; + case METIS_RTYPE_GREEDY: + printf("METIS_RTYPE_GREEDY\n"); + break; + case METIS_RTYPE_SEP2SIDED: + printf("METIS_RTYPE_SEP2SIDED\n"); + break; + case METIS_RTYPE_SEP1SIDED: + printf("METIS_RTYPE_SEP1SIDED\n"); + break; + default: + printf("Unknown!\n"); + } + + printf(" Perform a 2-hop matching: %s\n", (ctrl->no2hop ? "No" : "Yes")); + + printf(" On disk storage: %s\n", (ctrl->ondisk ? "Yes" : "No")); + printf(" Drop edges: %s\n", (ctrl->dropedges ? "Yes" : "No")); + + printf(" Number of balancing constraints: %"PRIDX"\n", ctrl->ncon); + printf(" Number of refinement iterations: %"PRIDX"\n", ctrl->niter); + printf(" Number of initial partitionings: %"PRIDX"\n", ctrl->nIparts); + printf(" Random number seed: %"PRIDX"\n", ctrl->seed); + + if (ctrl->optype == METIS_OP_OMETIS) { + printf(" Number of separators: %"PRIDX"\n", ctrl->nseps); + printf(" Compress graph prior to ordering: %s\n", (ctrl->compress ? "Yes" : "No")); + printf(" Detect & order connected components separately: %s\n", (ctrl->ccorder ? "Yes" : "No")); + printf(" Prunning factor for high degree vertices: %"PRREAL"\n", ctrl->pfactor); + } + else { + printf(" Number of partitions: %"PRIDX"\n", ctrl->nparts); + printf(" Number of cuts: %"PRIDX"\n", ctrl->ncuts); + printf(" User-supplied ufactor: %"PRIDX"\n", ctrl->ufactor); + + if (ctrl->optype == METIS_OP_KMETIS) { + printf(" Minimize connectivity: %s\n", (ctrl->minconn ? "Yes" : "No")); + printf(" Create contiguous partitions: %s\n", (ctrl->contig ? "Yes" : "No")); + } + + modnum = (ctrl->ncon==1 ? 5 : (ctrl->ncon==2 ? 3 : (ctrl->ncon==3 ? 2 : 1))); + printf(" Target partition weights: "); + for (i=0; i<ctrl->nparts; i++) { + if (i%modnum == 0) + printf("\n "); + printf("%4"PRIDX"=[", i); + for (j=0; j<ctrl->ncon; j++) + printf("%s%.2e", (j==0 ? "" : " "), (double)ctrl->tpwgts[i*ctrl->ncon+j]); + printf("]"); + } + printf("\n"); + } + + printf(" Allowed maximum load imbalance: "); + for (i=0; i<ctrl->ncon; i++) + printf("%.3"PRREAL" ", ctrl->ubfactors[i]); + printf("\n"); + + printf("\n"); +} + + +/*************************************************************************/ +/*! This function checks the validity of user-supplied parameters */ +/*************************************************************************/ +int CheckParams(ctrl_t *ctrl) +{ + idx_t i, j; + real_t sum; + mdbglvl_et dbglvl=METIS_DBG_INFO; + + switch (ctrl->optype) { + case METIS_OP_PMETIS: + if (ctrl->objtype != METIS_OBJTYPE_CUT) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_GROW && ctrl->iptype != METIS_IPTYPE_RANDOM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_FM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->ncuts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + + for (i=0; i<ctrl->ncon; i++) { + sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon); + if (sum < 0.99 || sum > 1.01) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i)); + return 0; + } + } + for (i=0; i<ctrl->ncon; i++) { + for (j=0; j<ctrl->nparts; j++) { + if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i)); + return 0; + } + } + } + + for (i=0; i<ctrl->ncon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + case METIS_OP_KMETIS: + if (ctrl->objtype != METIS_OBJTYPE_CUT && ctrl->objtype != METIS_OBJTYPE_VOL) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_METISRB && ctrl->iptype != METIS_IPTYPE_GROW) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_GREEDY) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->ncuts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + if (ctrl->contig != 0 && ctrl->contig != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect contig.\n")); + return 0; + } + if (ctrl->minconn != 0 && ctrl->minconn != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect minconn.\n")); + return 0; + } + + for (i=0; i<ctrl->ncon; i++) { + sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon); + if (sum < 0.99 || sum > 1.01) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i)); + return 0; + } + } + for (i=0; i<ctrl->ncon; i++) { + for (j=0; j<ctrl->nparts; j++) { + if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i)); + return 0; + } + } + } + + for (i=0; i<ctrl->ncon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + + + case METIS_OP_OMETIS: + if (ctrl->objtype != METIS_OBJTYPE_NODE) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n")); + return 0; + } + if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n")); + return 0; + } + if (ctrl->iptype != METIS_IPTYPE_EDGE && ctrl->iptype != METIS_IPTYPE_NODE) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n")); + return 0; + } + if (ctrl->rtype != METIS_RTYPE_SEP1SIDED && ctrl->rtype != METIS_RTYPE_SEP2SIDED) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n")); + return 0; + } + if (ctrl->nseps <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nseps.\n")); + return 0; + } + if (ctrl->niter <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n")); + return 0; + } + if (ctrl->ufactor <= 0) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n")); + return 0; + } + if (ctrl->numflag != 0 && ctrl->numflag != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n")); + return 0; + } + if (ctrl->nparts != 3) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n")); + return 0; + } + if (ctrl->ncon != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n")); + return 0; + } + if (ctrl->compress != 0 && ctrl->compress != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect compress.\n")); + return 0; + } + if (ctrl->ccorder != 0 && ctrl->ccorder != 1) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ccorder.\n")); + return 0; + } + if (ctrl->pfactor < 0.0 ) { + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect pfactor.\n")); + return 0; + } + + for (i=0; i<ctrl->ncon; i++) { + if (ctrl->ubfactors[i] <= 1.0) { + IFSET(dbglvl, METIS_DBG_INFO, + printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i)); + return 0; + } + } + + break; + + default: + IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect optype\n")); + return 0; + } + + return 1; +} + + +/*************************************************************************/ +/*! This function frees the memory associated with a ctrl_t */ +/*************************************************************************/ +void FreeCtrl(ctrl_t **r_ctrl) +{ + ctrl_t *ctrl = *r_ctrl; + + FreeWorkSpace(ctrl); + + gk_free((void **)&ctrl->tpwgts, &ctrl->pijbm, + &ctrl->ubfactors, &ctrl->maxvwgt, &ctrl, LTERM); + + *r_ctrl = NULL; +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/parmetis.c b/3rdParty/metis/metis-5.1.1/libmetis/parmetis.c new file mode 100644 index 000000000..984509a77 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/parmetis.c @@ -0,0 +1,817 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * parmetis.c + * + * This file contains top level routines that are used by ParMETIS + * + * Started 10/14/97 + * George + * + * $Id: parmetis.c 10481 2011-07-05 18:01:23Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point for the node ND code for ParMETIS. + The difference between this routine and the standard METIS_NodeND are + the following + + - It performs at least log2(npes) levels of nested dissection. + - It stores the size of the log2(npes) top-level separators in the + sizes array. +*/ +/*************************************************************************/ +int METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, + idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, idx_t *sizes) +{ + idx_t i, ii, j, l, nnvtxs=0; + graph_t *graph; + ctrl_t *ctrl; + idx_t *cptr, *cind; + + ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); + if (!ctrl) return METIS_ERROR_INPUT; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + /* compress the graph; not that compression only happens if not prunning + has taken place. */ + if (ctrl->compress) { + cptr = imalloc(nvtxs+1, "OMETIS: cptr"); + cind = imalloc(nvtxs, "OMETIS: cind"); + + graph = CompressGraph(ctrl, nvtxs, xadj, adjncy, vwgt, cptr, cind); + if (graph == NULL) { + /* if there was no compression, cleanup the compress flag */ + gk_free((void **)&cptr, &cind, LTERM); + ctrl->compress = 0; + } + else { + nnvtxs = graph->nvtxs; + } + } + + /* if no compression, setup the graph in the normal way. */ + if (ctrl->compress == 0) + graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); + + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); + + + /* do the nested dissection ordering */ + iset(2*npes-1, 0, sizes); + MlevelNestedDissectionP(ctrl, graph, iperm, graph->nvtxs, npes, 0, sizes); + + + /* Uncompress the ordering */ + if (ctrl->compress) { + /* construct perm from iperm */ + for (i=0; i<nnvtxs; i++) + perm[iperm[i]] = i; + for (l=ii=0; ii<nnvtxs; ii++) { + i = perm[ii]; + for (j=cptr[i]; j<cptr[i+1]; j++) + iperm[cind[j]] = l++; + } + + gk_free((void **)&cptr, &cind, LTERM); + } + + + for (i=0; i<nvtxs; i++) + perm[iperm[i]] = i; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); + + return METIS_OK; +} + + +/*************************************************************************/ +/*! This function is similar to MlevelNestedDissection with the difference + that it also records separator sizes for the top log2(npes) levels */ +/**************************************************************************/ +void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes) +{ + idx_t i, j, nvtxs, nbnd; + idx_t *label, *bndind; + graph_t *lgraph, *rgraph; + + nvtxs = graph->nvtxs; + + if (nvtxs == 0) { + FreeGraph(&graph); + return; + } + + MlevelNodeBisectionMultiple(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", + graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); + + if (cpos < npes-1) { + sizes[2*npes-2-cpos] = graph->pwgts[2]; + sizes[2*npes-2-(2*cpos+1)] = graph->pwgts[1]; + sizes[2*npes-2-(2*cpos+2)] = graph->pwgts[0]; + } + + /* Order the nodes in the separator */ + nbnd = graph->nbnd; + bndind = graph->bndind; + label = graph->label; + for (i=0; i<nbnd; i++) + order[label[bndind[i]]] = --lastvtx; + + SplitGraphOrder(ctrl, graph, &lgraph, &rgraph); + + /* Free the memory of the top level graph */ + FreeGraph(&graph); + + if ((lgraph->nvtxs > MMDSWITCH || 2*cpos+2 < npes-1) && lgraph->nedges > 0) + MlevelNestedDissectionP(ctrl, lgraph, order, lastvtx-rgraph->nvtxs, npes, 2*cpos+2, sizes); + else { + MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); + FreeGraph(&lgraph); + } + if ((rgraph->nvtxs > MMDSWITCH || 2*cpos+1 < npes-1) && rgraph->nedges > 0) + MlevelNestedDissectionP(ctrl, rgraph, order, lastvtx, npes, 2*cpos+1, sizes); + else { + MMDOrder(ctrl, rgraph, order, lastvtx); + FreeGraph(&rgraph); + } +} + + +/*************************************************************************/ +/*! This function bisects a graph by computing a vertex separator +*/ +/**************************************************************************/ +int METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *options, idx_t *r_sepsize, idx_t *part) +{ + idx_t i, j; + graph_t *graph; + ctrl_t *ctrl; + + if ((ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL)) == NULL) + return METIS_ERROR_INPUT; + + InitRandom(ctrl->seed); + + graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); + + AllocateWorkSpace(ctrl, graph); + + /*============================================================ + * Perform the bisection + *============================================================*/ + ctrl->CoarsenTo = 100; + + MlevelNodeBisectionMultiple(ctrl, graph); + + *r_sepsize = graph->pwgts[2]; + icopy(*nvtxs, graph->where, part); + + FreeGraph(&graph); + + FreeCtrl(&ctrl); + + return METIS_OK; +} + + +/*************************************************************************/ +/*! This function is the entry point of a node-based separator refinement + of the nodes with an hmarker[] of 0. */ +/*************************************************************************/ +int METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, + idx_t *where, idx_t *hmarker, real_t ubfactor) +{ + graph_t *graph; + ctrl_t *ctrl; + + /* set up the run time parameters */ + ctrl = SetupCtrl(METIS_OP_OMETIS, NULL, 1, 3, NULL, NULL); + if (!ctrl) return METIS_ERROR_INPUT; + + /* set up the graph */ + graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); + + /* set up the memory and the input partition */ + Allocate2WayNodePartitionMemory(ctrl, graph); + icopy(nvtxs, where, graph->where); + + Compute2WayNodePartitionParams(ctrl, graph); + + FM_2WayNodeRefine1SidedP(ctrl, graph, hmarker, ubfactor, 10); + /* FM_2WayNodeRefine2SidedP(ctrl, graph, hmarker, ubfactor, 10); */ + + icopy(nvtxs, graph->where, where); + + FreeGraph(&graph); + FreeCtrl(&ctrl); + + return METIS_OK; +} + + +/*************************************************************************/ +/*! This function performs a node-based 1-sided FM refinement that moves + only nodes whose hmarker[] == -1. It is used by Parmetis. */ +/*************************************************************************/ +void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, + idx_t *hmarker, real_t ubfactor, idx_t npasses) +{ + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *swaps, *inqueue; + rpq_t *queue; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, from, to, limit; + idx_t badmaxpwgt, mindiff, newdiff; + + WCOREPUSH; + + ASSERT(graph->mincut == graph->pwgts[2]); + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; + + queue = rpqCreate(nvtxs); + + inqueue = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); + + badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1])); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"] " + "MaxPwgt[%6"PRIDX"]. ISep: %6"PRIDX"\n", + pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, + graph->mincut)); + + to = (pwgts[0] < pwgts[1] ? 1 : 0); + for (pass=0; pass<npasses; pass++) { + from = to; + to = (from+1)%2; + + rpqReset(queue); + + mincutorder = -1; + initcut = mincut = graph->mincut; + nbnd = graph->nbnd; + + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[swaps[ii]]; + ASSERT(where[i] == 2); + if (hmarker[i] == -1 || hmarker[i] == to) { + rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[from]); + inqueue[i] = pass; + } + } + qsize = rpqLength(queue); + + ASSERT(CheckNodeBnd(graph, nbnd)); + ASSERT(CheckNodePartitionParams(graph)); + + limit = nbnd; + + /****************************************************** + * Get into the FM loop + *******************************************************/ + mptr[0] = nmind = nbad = 0; + mindiff = abs(pwgts[0]-pwgts[1]); + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if ((higain = rpqGetTop(queue)) == -1) + break; + + ASSERT(bndptr[higain] != -1); + + /* The following check is to ensure we break out if there is a posibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) + break; + + inqueue[higain] = -1; + + if (pwgts[to]+vwgt[higain] > badmaxpwgt) { /* Skip this vertex */ + if (nbad++ > limit) + break; + else { + nswaps--; + continue; + } + } + + pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[from]); + + newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[from]-rinfo[higain].edegrees[from])); + if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { + mincut = pwgts[2]; + mincutorder = nswaps; + mindiff = newdiff; + nbad = 0; + } + else { + if (nbad++ > limit) { + pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[from]); + break; /* No further improvement, break out */ + } + } + + BNDDelete(nbnd, bndind, bndptr, higain); + pwgts[to] += vwgt[higain]; + where[higain] = to; + swaps[nswaps] = higain; + + + /********************************************************** + * Update the degrees of the affected nodes + ***********************************************************/ + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ + rinfo[k].edegrees[to] += vwgt[higain]; + } + else if (where[k] == from) { /* This vertex is pulled into the separator */ + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); + BNDInsert(nbnd, bndind, bndptr, k); + + mind[nmind++] = k; /* Keep track for rollback */ + where[k] = 2; + pwgts[from] -= vwgt[k]; + + edegrees = rinfo[k].edegrees; + edegrees[0] = edegrees[1] = 0; + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] != 2) + edegrees[where[kk]] += vwgt[kk]; + else { + oldgain = vwgt[kk]-rinfo[kk].edegrees[from]; + rinfo[kk].edegrees[from] -= vwgt[k]; + + /* Update the gain of this node if it was not skipped */ + if (inqueue[kk] == pass) + rpqUpdate(queue, kk, oldgain+vwgt[k]); + } + } + + /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ + if (hmarker[k] == -1 || hmarker[k] == to) { + rpqInsert(queue, k, vwgt[k]-edegrees[from]); + inqueue[k] = pass; + } + } + } + mptr[nswaps+1] = nmind; + + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", + higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), + vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); + + } + + + /**************************************************************** + * Roll back computation + *****************************************************************/ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + ASSERT(CheckNodePartitionParams(graph)); + ASSERT(where[higain] == to); + + INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); + where[higain] = 2; + BNDInsert(nbnd, bndind, bndptr, higain); + + edegrees = rinfo[higain].edegrees; + edegrees[0] = edegrees[1] = 0; + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) + rinfo[k].edegrees[to] -= vwgt[higain]; + else + edegrees[where[k]] += vwgt[k]; + } + + /* Push nodes out of the separator */ + for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { + k = mind[j]; + ASSERT(where[k] == 2); + where[k] = from; + INC_DEC(pwgts[from], pwgts[2], vwgt[k]); + BNDDelete(nbnd, bndind, bndptr, k); + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] == 2) + rinfo[kk].edegrees[from] += vwgt[k]; + } + } + } + + ASSERT(mincut == pwgts[2]); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX", QSIZE: %6"PRIDX"\n", + mincut, mincutorder, pwgts[0], pwgts[1], nbnd, qsize)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) + break; + } + + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs a node-based (two-sided) FM refinement that + moves only nodes whose hmarker[] == -1. It is used by Parmetis. */ +/*************************************************************************/ +void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, + idx_t *hmarker, real_t ubfactor, idx_t npasses) +{ + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *moved, *swaps; + rpq_t *queues[2]; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + idx_t u[2], g[2]; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; + + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + + badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1])); + + for (pass=0; pass<npasses; pass++) { + iset(nvtxs, -1, moved); + rpqReset(queues[0]); + rpqReset(queues[1]); + + mincutorder = -1; + initcut = mincut = graph->mincut; + nbnd = graph->nbnd; + + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[swaps[ii]]; + ASSERT(where[i] == 2); + if (hmarker[i] == -1) { + rpqInsert(queues[0], i, vwgt[i]-rinfo[i].edegrees[1]); + rpqInsert(queues[1], i, vwgt[i]-rinfo[i].edegrees[0]); + moved[i] = -5; + } + else if (hmarker[i] != 2) { + rpqInsert(queues[hmarker[i]], i, vwgt[i]-rinfo[i].edegrees[(hmarker[i]+1)%2]); + moved[i] = -(10+hmarker[i]); + } + } + + ASSERT(CheckNodeBnd(graph, nbnd)); + ASSERT(CheckNodePartitionParams(graph)); + + limit = nbnd; + + /****************************************************** + * Get into the FM loop + *******************************************************/ + mptr[0] = nmind = 0; + mindiff = abs(pwgts[0]-pwgts[1]); + to = (pwgts[0] < pwgts[1] ? 0 : 1); + for (nswaps=0; nswaps<nvtxs; nswaps++) { + u[0] = rpqSeeTopVal(queues[0]); + u[1] = rpqSeeTopVal(queues[1]); + if (u[0] != -1 && u[1] != -1) { + g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1]; + g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0]; + + to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); + + if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) + to = (to+1)%2; + } + else if (u[0] == -1 && u[1] == -1) { + break; + } + else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) { + to = 0; + } + else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) { + to = 1; + } + else + break; + + other = (to+1)%2; + + higain = rpqGetTop(queues[to]); + + /* Delete its matching entry in the other queue */ + if (moved[higain] == -5) + rpqDelete(queues[other], higain); + + ASSERT(bndptr[higain] != -1); + + /* The following check is to ensure we break out if there is a posibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) + break; + + pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); + + newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); + if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { + mincut = pwgts[2]; + mincutorder = nswaps; + mindiff = newdiff; + } + else { + if (nswaps - mincutorder > limit) { + pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); + break; /* No further improvement, break out */ + } + } + + BNDDelete(nbnd, bndind, bndptr, higain); + pwgts[to] += vwgt[higain]; + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + + /********************************************************** + * Update the degrees of the affected nodes + ***********************************************************/ + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ + oldgain = vwgt[k]-rinfo[k].edegrees[to]; + rinfo[k].edegrees[to] += vwgt[higain]; + if (moved[k] == -5 || moved[k] == -(10+other)) + rpqUpdate(queues[other], k, oldgain-vwgt[higain]); + } + else if (where[k] == other) { /* This vertex is pulled into the separator */ + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); + BNDInsert(nbnd, bndind, bndptr, k); + + mind[nmind++] = k; /* Keep track for rollback */ + where[k] = 2; + pwgts[other] -= vwgt[k]; + + edegrees = rinfo[k].edegrees; + edegrees[0] = edegrees[1] = 0; + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] != 2) + edegrees[where[kk]] += vwgt[kk]; + else { + oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; + rinfo[kk].edegrees[other] -= vwgt[k]; + if (moved[kk] == -5 || moved[kk] == -(10+to)) + rpqUpdate(queues[to], kk, oldgain+vwgt[k]); + } + } + + /* Insert the new vertex into the priority queue (if it has not been moved). */ + if (moved[k] == -1 && (hmarker[k] == -1 || hmarker[k] == to)) { + rpqInsert(queues[to], k, vwgt[k]-edegrees[other]); + moved[k] = -(10+to); + } +#ifdef FULLMOVES /* this does not work as well as the above partial one */ + if (moved[k] == -1) { + if (hmarker[k] == -1) { + rpqInsert(queues[0], k, vwgt[k]-edegrees[1]); + rpqInsert(queues[1], k, vwgt[k]-edegrees[0]); + moved[k] = -5; + } + else if (hmarker[k] != 2) { + rpqInsert(queues[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]); + moved[k] = -(10+hmarker[k]); + } + } +#endif + } + } + mptr[nswaps+1] = nmind; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] " + "[%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", + higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], + pwgts[0], pwgts[1], pwgts[2])); + + } + + + /**************************************************************** + * Roll back computation + *****************************************************************/ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + ASSERT(CheckNodePartitionParams(graph)); + + to = where[higain]; + other = (to+1)%2; + INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); + where[higain] = 2; + BNDInsert(nbnd, bndind, bndptr, higain); + + edegrees = rinfo[higain].edegrees; + edegrees[0] = edegrees[1] = 0; + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) + rinfo[k].edegrees[to] -= vwgt[higain]; + else + edegrees[where[k]] += vwgt[k]; + } + + /* Push nodes out of the separator */ + for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { + k = mind[j]; + ASSERT(where[k] == 2); + where[k] = other; + INC_DEC(pwgts[other], pwgts[2], vwgt[k]); + BNDDelete(nbnd, bndind, bndptr, k); + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] == 2) + rinfo[kk].edegrees[other] += vwgt[k]; + } + } + } + + ASSERT(mincut == pwgts[2]); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + if (mincutorder == -1 || mincut >= initcut) + break; + } + + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function computes a cache-friendly permutation of each partition. + The resulting permutation is retuned in old2new, which is a vector of + size nvtxs such for vertex i, old2new[i] is its new vertex number. +*/ +/**************************************************************************/ +int METIS_CacheFriendlyReordering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *part, idx_t *old2new) +{ + idx_t i, j, k, first, last, lastlevel, maxdegree, nparts; + idx_t *cot, *pos, *pwgts; + ikv_t *levels; + + InitRandom(123); + + /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. + Positions from [0...first) is the current iperm[] vector of the explored vertices; + Positions from [first...last) is the OPEN list (i.e., visited vertices); + Positions from [last...nvtxs) is the todo list. */ + cot = iincset(nvtxs, 0, imalloc(nvtxs, "METIS_CacheFriendlyReordering: cor")); + + /* This array will function like pos + touched of the CC method */ + pos = iincset(nvtxs, 0, imalloc(nvtxs, "METIS_CacheFriendlyReordering: pos")); + + /* pick a random starting vertex */ + i = irandInRange(nvtxs); + pos[0] = cot[0] = i; + pos[i] = cot[i] = 0; + + /* compute a BFS ordering */ + first = last = 0; + lastlevel = 0; + maxdegree = 0; + while (first < nvtxs) { + if (first == last) { /* Find another starting vertex */ + k = cot[last]; + ASSERT(pos[k] >= 0); + pos[k] = --lastlevel; /* mark node as being visited by assigning its current level (-ve) */ + last++; + } + + i = cot[first++]; + maxdegree = (maxdegree < xadj[i+1]-xadj[i] ? xadj[i+1]-xadj[i] : maxdegree); + for (j=xadj[i]; j<xadj[i+1]; j++) { + k = adjncy[j]; + /* if a node has been already been visited, its pos[] will be -1 */ + if (pos[k] >= 0) { + /* pos[k] is the location within cot of where k resides (it is in the 'todo' part); + put in that location cot[last] that we are about to overwrite + and update pos[cot[last]] to reflect that. */ + cot[pos[k]] = cot[last]; + pos[cot[last]] = pos[k]; + + cot[last++] = k; /* put node at the end of the "queue" */ + pos[k] = pos[i]-1; /* mark node as being visited by assigning to next level */ + lastlevel = pos[k]; /* for correctly advancing the levels in case of disconnected graphs */ + } + } + } +// printf("lastlevel: %d\n", (int)-lastlevel); + + /* sort based on decreasing level and decreasing degree (RCM) */ + levels = ikvmalloc(nvtxs, "METIS_CacheFriendlyReordering: levels"); + maxdegree++; + for (i=0; i<nvtxs; i++) { + levels[i].val = i; + levels[i].key = -pos[i]*maxdegree + xadj[i+1]-xadj[i]; + } + ikvsortd(nvtxs, levels); + + /* figure out the partitions */ + nparts = imax(nvtxs, part, 1)+1; + pwgts = ismalloc(nparts+1, 0, "METIS_CacheFriendlyReordering: pwgts"); + + for (i=0; i<nvtxs; i++) + pwgts[part[i]]++; + MAKECSR(i, nparts, pwgts); + + for (i=0; i<nvtxs; i++) + old2new[levels[i].val] = pwgts[part[levels[i].val]]++; + +#ifdef XXX + for (i=0; i<nvtxs; i++) + for (j=xadj[i]; j<xadj[i+1]; j++) + printf("COO: %d %d\n", (int)old2new[i], (int)old2new[adjncy[j]]); +#endif + + gk_free((void **)&cot, &pos, &levels, &pwgts, LTERM); + + return METIS_OK; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/pmetis.c b/3rdParty/metis/metis-5.1.1/libmetis/pmetis.c new file mode 100644 index 000000000..d32e84921 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/pmetis.c @@ -0,0 +1,387 @@ +/** +\file +\brief This file contains the top level routines for the multilevel recursive bisection + algorithm PMETIS. + +\date Started 7/24/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: pmetis.c 10513 2011-07-07 22:06:03Z karypis $ \endverbatim +*/ + + +#include "metislib.h" + + +/*************************************************************************/ +/*! \ingroup api + \brief Recursive partitioning routine. + + This function computes a partitioning of a graph based on multilevel + recursive bisection. It can be used to partition a graph into \e k + parts. The objective of the partitioning is to minimize the edgecut + subject to one or more balancing constraints. + + \param[in] nvtxs is the number of vertices in the graph. + + \param[in] ncon is the number of balancing constraints. For the standard + partitioning problem in which each vertex is either unweighted + or has a single weight, ncon should be 1. + + \param[in] xadj is an array of size nvtxs+1 used to specify the starting + positions of the adjacency structure of the vertices in the + adjncy array. + + \param[in] adjncy is an array of size to the sum of the degrees of the + graph that stores for each vertex the set of vertices that + is adjancent to. + + \param[in] vwgt is an array of size nvtxs*ncon that stores the weights + of the vertices for each constraint. The ncon weights for the + ith vertex are stored in the ncon consecutive locations starting + at vwgt[i*ncon]. When ncon==1, a NULL value can be passed indicating + that all the vertices in the graph have the same weight. + + \param[in] adjwgt is an array of size equal to adjncy, specifying the weight + for each edge (i.e., adjwgt[j] corresponds to the weight of the + edge stored in adjncy[j]). + A NULL value can be passed indicating that all the edges in the + graph have the same weight. + + \param[in] nparts is the number of desired partitions. + + \param[in] tpwgts is an array of size nparts*ncon that specifies the + desired weight for each part and constraint. The \e{target partition + weight} for the ith part and jth constraint is specified + at tpwgts[i*ncon+j] (the numbering of i and j starts from 0). + For each constraint, the sum of the tpwgts[] entries must be + 1.0 (i.e., \f$ \sum_i tpwgts[i*ncon+j] = 1.0 \f$). + A NULL value can be passed indicating that the graph should + be equally divided among the parts. + + \param[in] ubvec is an array of size ncon that specifies the allowed + load imbalance tolerance for each constraint. + For the ith part and jth constraint the allowed weight is the + ubvec[j]*tpwgts[i*ncon+j] fraction of the jth's constraint total + weight. The load imbalances must be greater than 1.0. + A NULL value can be passed indicating that the load imbalance + tolerance for each constraint should be 1.001 (for ncon==1) + or 1.01 (for ncon>1). + + \params[in] options is the array for passing additional parameters + in order to customize the behaviour of the partitioning + algorithm. + + \params[out] edgecut stores the cut of the partitioning. + + \params[out] part is an array of size nvtxs used to store the + computed partitioning. The partition number for the ith + vertex is stored in part[i]. Based on the numflag parameter, + the numbering of the parts starts from either 0 or 1. + + + \returns + \retval METIS_OK indicates that the function returned normally. + \retval METIS_ERROR_INPUT indicates an input error. + \retval METIS_ERROR_MEMORY indicates that it could not allocate + the required memory. + +*/ +/*************************************************************************/ +int METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, + idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, + idx_t *objval, idx_t *part) +{ + int sigrval=0, renumber=0; + graph_t *graph; + ctrl_t *ctrl; + + /* set up malloc cleaning code and signal catchers */ + if (!gk_malloc_init()) + return METIS_ERROR_MEMORY; + + gk_sigtrap(); + + if ((sigrval = gk_sigcatch()) != 0) + goto SIGTHROW; + + + /* set up the run parameters */ + ctrl = SetupCtrl(METIS_OP_PMETIS, options, *ncon, *nparts, tpwgts, ubvec); + if (!ctrl) { + gk_siguntrap(); + return METIS_ERROR_INPUT; + } + + /* if required, change the numbering to 0 */ + if (ctrl->numflag == 1) { + Change2CNumbering(*nvtxs, xadj, adjncy); + renumber = 1; + } + + /* set up the graph */ + graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); + + /* allocate workspace memory */ + AllocateWorkSpace(ctrl, graph); + + /* start the partitioning */ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); + + *objval = MlevelRecursiveBisection(ctrl, graph, *nparts, part, ctrl->tpwgts, 0); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); + + /* clean up */ + FreeCtrl(&ctrl); + +SIGTHROW: + /* if required, change the numbering back to 1 */ + if (renumber) + Change2FNumbering(*nvtxs, xadj, adjncy, part); + + gk_siguntrap(); + gk_malloc_cleanup(0); + + return metis_rcode(sigrval); +} + + +/*************************************************************************/ +/*! This function is the top-level driver of the recursive bisection + routine. */ +/*************************************************************************/ +idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, + idx_t *part, real_t *tpwgts, idx_t fpart) +{ + idx_t i, j, nvtxs, ncon, objval; + idx_t *label, *where; + graph_t *lgraph, *rgraph; + real_t wsum, *tpwgts2; + + if ((nvtxs = graph->nvtxs) == 0) { + printf("\t***Cannot bisect a graph with 0 vertices!\n" + "\t***You are trying to partition a graph into too many parts!\n"); + return 0; + } + + ncon = graph->ncon; + + /* determine the weights of the two partitions as a function of the weight of the + target partition weights */ + WCOREPUSH; + tpwgts2 = rwspacemalloc(ctrl, 2*ncon); + for (i=0; i<ncon; i++) { + tpwgts2[i] = rsum((nparts>>1), tpwgts+i, ncon); + tpwgts2[ncon+i] = 1.0 - tpwgts2[i]; + } + + /* perform the bisection */ + objval = MultilevelBisect(ctrl, graph, tpwgts2); + + WCOREPOP; + + label = graph->label; + where = graph->where; + for (i=0; i<nvtxs; i++) + part[label[i]] = where[i] + fpart; + + if (nparts > 2) + SplitGraphPart(ctrl, graph, &lgraph, &rgraph); + + /* Free the memory of the top level graph */ + FreeGraph(&graph); + + /* Scale the fractions in the tpwgts according to the true weight */ + for (i=0; i<ncon; i++) { + wsum = rsum((nparts>>1), tpwgts+i, ncon); + rscale((nparts>>1), 1.0/wsum, tpwgts+i, ncon); + rscale(nparts-(nparts>>1), 1.0/(1.0-wsum), tpwgts+(nparts>>1)*ncon+i, ncon); + } + + /* Do the recursive call */ + if (nparts > 3) { + objval += MlevelRecursiveBisection(ctrl, lgraph, (nparts>>1), part, + tpwgts, fpart); + objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, + tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1)); + } + else if (nparts == 3) { + FreeGraph(&lgraph); + objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, + tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1)); + } + + + return objval; +} + + +/*************************************************************************/ +/*! This function performs a multilevel bisection */ +/*************************************************************************/ +idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts) +{ + idx_t i, niparts, bestobj=0, curobj=0, *bestwhere=NULL; + graph_t *cgraph; + real_t bestbal=0.0, curbal=0.0; + + Setup2WayBalMultipliers(ctrl, graph, tpwgts); + + WCOREPUSH; + + if (ctrl->ncuts > 1) + bestwhere = iwspacemalloc(ctrl, graph->nvtxs); + + for (i=0; i<ctrl->ncuts; i++) { + cgraph = CoarsenGraph(ctrl, graph); + + niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); + Init2WayPartition(ctrl, cgraph, tpwgts, niparts); + + Refine2Way(ctrl, graph, cgraph, tpwgts); + + curobj = graph->mincut; + curbal = ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors); + + if (i == 0 + || (curbal <= 0.0005 && bestobj > curobj) + || (bestbal > 0.0005 && curbal < bestbal)) { + bestobj = curobj; + bestbal = curbal; + if (i < ctrl->ncuts-1) + icopy(graph->nvtxs, graph->where, bestwhere); + } + + if (bestobj == 0) + break; + + if (i < ctrl->ncuts-1) + FreeRData(graph); + } + + if (bestobj != curobj) { + icopy(graph->nvtxs, bestwhere, graph->where); + Compute2WayPartitionParams(ctrl, graph); + } + + WCOREPOP; + + return bestobj; +} + + +/*************************************************************************/ +/*! This function splits a graph into two based on its bisection */ +/*************************************************************************/ +void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph) +{ + idx_t i, j, k, l, istart, iend, mypart, nvtxs, ncon, snvtxs[2], snedges[2]; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr; + idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2]; + idx_t *rename; + idx_t *auxadjncy, *auxadjwgt; + graph_t *lgraph, *rgraph; + + WCOREPUSH; + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr)); + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + label = graph->label; + where = graph->where; + bndptr = graph->bndptr; + + ASSERT(bndptr != NULL); + + rename = iwspacemalloc(ctrl, nvtxs); + + snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0; + for (i=0; i<nvtxs; i++) { + k = where[i]; + rename[i] = snvtxs[k]++; + snedges[k] += xadj[i+1]-xadj[i]; + } + + lgraph = SetupSplitGraph(graph, snvtxs[0], snedges[0]); + sxadj[0] = lgraph->xadj; + svwgt[0] = lgraph->vwgt; + sadjncy[0] = lgraph->adjncy; + sadjwgt[0] = lgraph->adjwgt; + slabel[0] = lgraph->label; + + rgraph = SetupSplitGraph(graph, snvtxs[1], snedges[1]); + sxadj[1] = rgraph->xadj; + svwgt[1] = rgraph->vwgt; + sadjncy[1] = rgraph->adjncy; + sadjwgt[1] = rgraph->adjwgt; + slabel[1] = rgraph->label; + + snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0; + sxadj[0][0] = sxadj[1][0] = 0; + for (i=0; i<nvtxs; i++) { + mypart = where[i]; + + istart = xadj[i]; + iend = xadj[i+1]; + if (bndptr[i] == -1) { /* This is an interior vertex */ + auxadjncy = sadjncy[mypart] + snedges[mypart] - istart; + auxadjwgt = sadjwgt[mypart] + snedges[mypart] - istart; + for(j=istart; j<iend; j++) { + auxadjncy[j] = adjncy[j]; + auxadjwgt[j] = adjwgt[j]; + } + snedges[mypart] += iend-istart; + } + else { + auxadjncy = sadjncy[mypart]; + auxadjwgt = sadjwgt[mypart]; + l = snedges[mypart]; + for (j=istart; j<iend; j++) { + k = adjncy[j]; + if (where[k] == mypart) { + auxadjncy[l] = k; + auxadjwgt[l++] = adjwgt[j]; + } + } + snedges[mypart] = l; + } + + /* copy vertex weights */ + for (k=0; k<ncon; k++) + svwgt[mypart][snvtxs[mypart]*ncon+k] = vwgt[i*ncon+k]; + + slabel[mypart][snvtxs[mypart]] = label[i]; + sxadj[mypart][++snvtxs[mypart]] = snedges[mypart]; + } + + for (mypart=0; mypart<2; mypart++) { + iend = sxadj[mypart][snvtxs[mypart]]; + auxadjncy = sadjncy[mypart]; + for (i=0; i<iend; i++) + auxadjncy[i] = rename[auxadjncy[i]]; + } + + lgraph->nedges = snedges[0]; + rgraph->nedges = snedges[1]; + + SetupGraph_tvwgt(lgraph); + SetupGraph_tvwgt(rgraph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr)); + + *r_lgraph = lgraph; + *r_rgraph = rgraph; + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/proto.h b/3rdParty/metis/metis-5.1.1/libmetis/proto.h new file mode 100644 index 000000000..3a8bd80d5 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/proto.h @@ -0,0 +1,357 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * proto.h + * + * This file contains header files + * + * Started 10/19/95 + * George + * + * $Id: proto.h 20398 2016-11-22 17:17:12Z karypis $ + * + */ + +#ifndef _LIBMETIS_PROTO_H_ +#define _LIBMETIS_PROTO_H_ + +/* auxapi.c */ + +/* balance.c */ +void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); +void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts); + + +/* bucketsort.c */ +void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys, + idx_t *tperm, idx_t *perm); + + +/* checkgraph.c */ +int CheckGraph(graph_t *graph, int numflag, int verbose); +int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); +graph_t *FixGraph(graph_t *graph); + + +/* coarsen.c */ +graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph); +graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels); +idx_t Match_RM(ctrl_t *ctrl, graph_t *graph); +idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph); +idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t nunmatched); +idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree); +idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, + idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree); +idx_t Match_JC(ctrl_t *ctrl, graph_t *graph); +void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph); +void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match); +void CreateCoarseGraphNoMask(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match); +void CreateCoarseGraphPerm(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, + idx_t *match, idx_t *perm); +graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, int dovsize); +void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph); + + + +/* compress.c */ +graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *cptr, idx_t *cind); +graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, + idx_t *vwgt, idx_t *iperm, real_t factor); + + +/* contig.c */ +idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, + idx_t *cptr, idx_t *cind); +void ComputeBFSOrdering(ctrl_t *ctrl, graph_t *graph, idx_t *bfsperm); +idx_t IsConnected(graph_t *graph, idx_t report); +idx_t IsConnectedSubdomain(ctrl_t *, graph_t *, idx_t, idx_t); +idx_t FindSepInducedComponents(ctrl_t *, graph_t *, idx_t *, idx_t *); +void EliminateComponents(ctrl_t *ctrl, graph_t *graph); +void MoveGroupContigForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind); +void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, + idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker, + idx_t *modind); + + +/* debug.c */ +idx_t ComputeCut(graph_t *graph, idx_t *where); +idx_t ComputeVolume(graph_t *, idx_t *); +idx_t ComputeMaxCut(graph_t *graph, idx_t nparts, idx_t *where); +idx_t CheckBnd(graph_t *); +idx_t CheckBnd2(graph_t *); +idx_t CheckNodeBnd(graph_t *, idx_t); +idx_t CheckRInfo(ctrl_t *ctrl, ckrinfo_t *rinfo); +idx_t CheckNodePartitionParams(graph_t *); +idx_t IsSeparable(graph_t *); +void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph); + + +/* fm.c */ +void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter); +void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, rpq_t **queues, + idx_t *from, idx_t *cnum); +void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, + real_t deltabal, idx_t mincutorder); + + +/* fortran.c */ +void Change2CNumbering(idx_t, idx_t *, idx_t *); +void Change2FNumbering(idx_t, idx_t *, idx_t *, idx_t *); +void Change2FNumbering2(idx_t, idx_t *, idx_t *); +void Change2FNumberingOrder(idx_t, idx_t *, idx_t *, idx_t *, idx_t *); +void ChangeMesh2CNumbering(idx_t n, idx_t *ptr, idx_t *ind); +void ChangeMesh2FNumbering(idx_t n, idx_t *ptr, idx_t *ind, idx_t nvtxs, + idx_t *xadj, idx_t *adjncy); +void ChangeMesh2FNumbering2(idx_t ne, idx_t nn, idx_t *ptr, idx_t *ind, + idx_t *epart, idx_t *npart); + + +/* graph.c */ +graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, + idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); +void SetupGraph_tvwgt(graph_t *graph); +void SetupGraph_label(graph_t *graph); +graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges); +graph_t *CreateGraph(void); +void InitGraph(graph_t *graph); +void FreeRData(graph_t *graph); +void FreeGraph(graph_t **graph); +void graph_WriteToDisk(ctrl_t *ctrl, graph_t *graph); +void graph_ReadFromDisk(ctrl_t *ctrl, graph_t *graph); + + +/* initpart.c */ +void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); +void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts); + + +/* kmetis.c */ +idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part); +void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph); +idx_t BlockKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part); +idx_t GrowMultisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where); +void BalanceAndRefineLP(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where); +void BalanceAndRefine(ctrl_t *ctrl, graph_t *graph, idx_t nparts, idx_t *where); + + +/* kwayfm.c */ +void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, + real_t ffactor, idx_t omode); +idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where, + idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk); +void KWayVolUpdate(ctrl_t *ctrl, graph_t *graph, idx_t v, idx_t from, + idx_t to, ipq_t *queue, idx_t *vstatus, idx_t *r_nupd, idx_t *updptr, + idx_t *updind, idx_t bndtype, idx_t *vmarker, idx_t *pmarker, + idx_t *modind); +void Greedy_KWayEdgeStats(ctrl_t *ctrl, graph_t *graph); +void Greedy_KWayEdgeCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter); + + +/* kwayrefine.c */ +void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph); +void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph); +void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph); +void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph); +void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype); +void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph); +int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor); + + +/* mcutil.c */ +int rvecle(idx_t n, real_t *x, real_t *y); +int rvecge(idx_t n, real_t *x, real_t *y); +int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y); +real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y); +int ivecle(idx_t n, idx_t *x, idx_t *z); +int ivecge(idx_t n, idx_t *x, idx_t *z); +int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z); +int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z); +int BetterVBalance(idx_t ncon, real_t *itvwgt, idx_t *v_vwgt, idx_t *u1_vwgt, + idx_t *u2_vwgt); +int BetterBalance2Way(idx_t n, real_t *x, real_t *y); +int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *itvwgt, idx_t a1, + idx_t *pt1, real_t *bm1, idx_t a2, idx_t *pt2, real_t *bm2); +real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm); +real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubvec); +real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *ubfactors, real_t *diffvec); +void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm, + real_t *lbvec); + + +/* mesh.c */ +void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, + idx_t **r_xadj, idx_t **r_adjncy); +idx_t FindCommonElements(idx_t qid, idx_t elen, idx_t *eind, idx_t *nptr, + idx_t *nind, idx_t *eptr, idx_t ncommon, idx_t *marker, idx_t *nbrs); +void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t **r_xadj, + idx_t **r_adjncy); +idx_t FindCommonNodes(idx_t qid, idx_t nelmnts, idx_t *elmntids, idx_t *eptr, + idx_t *eind, idx_t *marker, idx_t *nbrs); +mesh_t *CreateMesh(void); +void InitMesh(mesh_t *mesh); +void FreeMesh(mesh_t **mesh); + + +/* meshpart.c */ +void InduceRowPartFromColumnPart(idx_t nrows, idx_t *rowptr, idx_t *rowind, + idx_t *rpart, idx_t *cpart, idx_t nparts, real_t *tpwgts); + + +/* minconn.c */ +void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph); +void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, + idx_t *r_maxndoms); +void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where); +void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph); +void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind); +void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, + idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind); + + +/* mincover.o */ +void MinCover(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *); +idx_t MinCover_Augment(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t); +void MinCover_Decompose(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *, idx_t *); +void MinCover_ColDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t); +void MinCover_RowDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t); + + +/* mmd.c */ +void genmmd(idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t , idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *); +void mmdelm(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t); +idx_t mmdint(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *); +void mmdnum(idx_t, idx_t *, idx_t *, idx_t *); +void mmdupd(idx_t, idx_t, idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *tag); + + +/* ometis.c */ +void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx); +void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx); +void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph); +void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts); +void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, + graph_t **r_rgraph); +graph_t **SplitGraphOrderCC(ctrl_t *ctrl, graph_t *graph, idx_t ncmps, + idx_t *cptr, idx_t *cind); +void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx); + + +/* options.c */ +ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, + real_t *tpwgts, real_t *ubvec); +void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph); +void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts); +void PrintCtrl(ctrl_t *ctrl); +int CheckParams(ctrl_t *ctrl); +void FreeCtrl(ctrl_t **r_ctrl); + + +/* parmetis.c */ +void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order, + idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes); +void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, + real_t ubfactor, idx_t npasses); +void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, + real_t ubfactor, idx_t npasses); + + +/* pmetis.c */ +idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, + idx_t *part, real_t *tpwgts, idx_t fpart); +idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts); +void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, graph_t **r_rgraph); + + +/* refine.c */ +void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *rtpwgts); +void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph); +void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph); +void Project2WayPartition(ctrl_t *ctrl, graph_t *graph); + + +/* separator.c */ +void ConstructSeparator(ctrl_t *ctrl, graph_t *graph); +void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph); + + +/* sfm.c */ +void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter); +void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter); +void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph); + + +/* srefine.c */ +void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph); +void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph); +void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph); +void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph); + + +/* stat.c */ +void ComputePartitionInfoBipartite(graph_t *, idx_t, idx_t *); +void ComputePartitionBalance(graph_t *, idx_t, idx_t *, real_t *); +real_t ComputeElementBalance(idx_t, idx_t, idx_t *); + + +/* timing.c */ +void InitTimers(ctrl_t *); +void PrintTimers(ctrl_t *); + +/* util.c */ +idx_t iargmax_strd(size_t, idx_t *, idx_t); +idx_t iargmax_nrm(size_t n, idx_t *x, real_t *y); +idx_t iargmax2_nrm(size_t n, idx_t *x, real_t *y); +idx_t rargmax2(size_t, real_t *); +void InitRandom(idx_t); +int metis_rcode(int sigrval); + + + +/* wspace.c */ +void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph); +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize); +void FreeWorkSpace(ctrl_t *ctrl); +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes); +void wspacepush(ctrl_t *ctrl); +void wspacepop(ctrl_t *ctrl); +idx_t *iwspacemalloc(ctrl_t *, idx_t); +real_t *rwspacemalloc(ctrl_t *, idx_t); +ikv_t *ikvwspacemalloc(ctrl_t *, idx_t); +void cnbrpoolReset(ctrl_t *ctrl); +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs); +void vnbrpoolReset(ctrl_t *ctrl); +idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs); + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/refine.c b/3rdParty/metis/metis-5.1.1/libmetis/refine.c new file mode 100644 index 000000000..9a9fc0e3c --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/refine.c @@ -0,0 +1,216 @@ +/* +\file +\brief This file contains the driving routines for multilevel refinement + +\date Started 7/24/1997 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version\verbatim $Id: refine.c 14362 2013-05-21 21:35:23Z karypis $ \endverbatim +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point of refinement */ +/*************************************************************************/ +void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *tpwgts) +{ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); + + /* Compute the parameters of the coarsest graph */ + Compute2WayPartitionParams(ctrl, graph); + + for (;;) { + ASSERT(CheckBnd(graph)); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + + Balance2Way(ctrl, graph, tpwgts); + + FM_2WayRefine(ctrl, graph, tpwgts, ctrl->niter); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); + + if (graph == orggraph) + break; + + graph = graph->finer; + graph_ReadFromDisk(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); + Project2WayPartition(ctrl, graph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); +} + + +/*************************************************************************/ +/*! This function allocates memory for 2-way edge refinement */ +/*************************************************************************/ +void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs, ncon; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + + graph->pwgts = imalloc(2*ncon, "Allocate2WayPartitionMemory: pwgts"); + graph->where = imalloc(nvtxs, "Allocate2WayPartitionMemory: where"); + graph->bndptr = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndptr"); + graph->bndind = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndind"); + graph->id = imalloc(nvtxs, "Allocate2WayPartitionMemory: id"); + graph->ed = imalloc(nvtxs, "Allocate2WayPartitionMemory: ed"); +} + + +/*************************************************************************/ +/*! This function computes the initial id/ed */ +/*************************************************************************/ +void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, nvtxs, ncon, nbnd, mincut, istart, iend, tid, ted, me; + idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts; + idx_t *where, *bndptr, *bndind, *id, *ed; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + id = graph->id; + ed = graph->ed; + + pwgts = iset(2*ncon, 0, graph->pwgts); + bndptr = iset(nvtxs, -1, graph->bndptr); + bndind = graph->bndind; + + /* Compute pwgts */ + if (ncon == 1) { + for (i=0; i<nvtxs; i++) { + ASSERT(where[i] >= 0 && where[i] <= 1); + pwgts[where[i]] += vwgt[i]; + } + ASSERT(pwgts[0]+pwgts[1] == graph->tvwgt[0]); + } + else { + for (i=0; i<nvtxs; i++) { + me = where[i]; + for (j=0; j<ncon; j++) + pwgts[me*ncon+j] += vwgt[i*ncon+j]; + } + } + + + /* Compute the required info for refinement */ + for (nbnd=0, mincut=0, i=0; i<nvtxs; i++) { + istart = xadj[i]; + iend = xadj[i+1]; + + me = where[i]; + tid = ted = 0; + + for (j=istart; j<iend; j++) { + if (me == where[adjncy[j]]) + tid += adjwgt[j]; + else + ted += adjwgt[j]; + } + id[i] = tid; + ed[i] = ted; + + if (ted > 0 || istart == iend) { + BNDInsert(nbnd, bndind, bndptr, i); + mincut += ted; + } + } + + graph->mincut = mincut/2; + graph->nbnd = nbnd; + +} + + +/*************************************************************************/ +/*! Projects a partition and computes the refinement params. */ +/*************************************************************************/ +void Project2WayPartition(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, istart, iend, nvtxs, nbnd, me, tid, ted; + idx_t *xadj, *adjncy, *adjwgt; + idx_t *cmap, *where, *bndptr, *bndind; + idx_t *cwhere, *cbndptr; + idx_t *id, *ed; + graph_t *cgraph; + int dropedges; + + Allocate2WayPartitionMemory(ctrl, graph); + + dropedges = ctrl->dropedges; + + cgraph = graph->coarser; + cwhere = cgraph->where; + cbndptr = cgraph->bndptr; + + nvtxs = graph->nvtxs; + cmap = graph->cmap; + xadj = graph->xadj; + adjncy = graph->adjncy; + adjwgt = graph->adjwgt; + + where = graph->where; + id = graph->id; + ed = graph->ed; + + bndptr = iset(nvtxs, -1, graph->bndptr); + bndind = graph->bndind; + + /* Project the partition and record which of these nodes came from the + coarser boundary */ + for (i=0; i<nvtxs; i++) { + j = cmap[i]; + where[i] = cwhere[j]; + cmap[i] = (dropedges ? 0 : cbndptr[j]); + } + + /* Compute the refinement information of the nodes */ + for (nbnd=0, i=0; i<nvtxs; i++) { + istart = xadj[i]; + iend = xadj[i+1]; + + tid = ted = 0; + if (cmap[i] == -1) { /* Interior node. Note that cmap[i] = cbndptr[cmap[i]] */ + for (j=istart; j<iend; j++) + tid += adjwgt[j]; + } + else { /* Potentially an interface node */ + me = where[i]; + for (j=istart; j<iend; j++) { + if (me == where[adjncy[j]]) + tid += adjwgt[j]; + else + ted += adjwgt[j]; + } + } + id[i] = tid; + ed[i] = ted; + + if (ted > 0 || istart == iend) + BNDInsert(nbnd, bndind, bndptr, i); + } + graph->mincut = (dropedges ? ComputeCut(graph, where) : cgraph->mincut); + graph->nbnd = nbnd; + + /* copy pwgts */ + icopy(2*graph->ncon, cgraph->pwgts, graph->pwgts); + + FreeGraph(&graph->coarser); + graph->coarser = NULL; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/rename.h b/3rdParty/metis/metis-5.1.1/libmetis/rename.h new file mode 100644 index 000000000..59a5e762b --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/rename.h @@ -0,0 +1,269 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * rename.h + * + * This file contains header files + * + * Started 10/2/97 + * George + * + * $Id: rename.h 20398 2016-11-22 17:17:12Z karypis $ + * + */ + + +#ifndef _LIBMETIS_RENAME_H_ +#define _LIBMETIS_RENAME_H_ + + +/* balance.c */ +#define Balance2Way libmetis__Balance2Way +#define Bnd2WayBalance libmetis__Bnd2WayBalance +#define General2WayBalance libmetis__General2WayBalance +#define McGeneral2WayBalance libmetis__McGeneral2WayBalance + +/* bucketsort.c */ +#define BucketSortKeysInc libmetis__BucketSortKeysInc + +/* checkgraph.c */ +#define CheckGraph libmetis__CheckGraph +#define CheckInputGraphWeights libmetis__CheckInputGraphWeights +#define FixGraph libmetis__FixGraph + +/* coarsen.c */ +#define CoarsenGraph libmetis__CoarsenGraph +#define Match_RM libmetis__Match_RM +#define Match_SHEM libmetis__Match_SHEM +#define Match_2Hop libmetis__Match_2Hop +#define Match_2HopAny libmetis__Match_2HopAny +#define Match_2HopAll libmetis__Match_2HopAll +#define Match_JC libmetis__Match_JC +#define PrintCGraphStats libmetis__PrintCGraphStats +#define CreateCoarseGraph libmetis__CreateCoarseGraph +#define CreateCoarseGraphNoMask libmetis__CreateCoarseGraphNoMask +#define CreateCoarseGraphPerm libmetis__CreateCoarseGraphPerm +#define SetupCoarseGraph libmetis__SetupCoarseGraph +#define ReAdjustMemory libmetis__ReAdjustMemory + +/* compress.c */ +#define CompressGraph libmetis__CompressGraph +#define PruneGraph libmetis__PruneGraph + +/* contig.c */ +#define FindPartitionInducedComponents libmetis__FindPartitionInducedComponents +#define IsConnected libmetis__IsConnected +#define IsConnectedSubdomain libmetis__IsConnectedSubdomain +#define FindSepInducedComponents libmetis__FindSepInducedComponents +#define EliminateComponents libmetis__EliminateComponents +#define MoveGroupContigForCut libmetis__MoveGroupContigForCut +#define MoveGroupContigForVol libmetis__MoveGroupContigForVol + +/* debug.c */ +#define ComputeCut libmetis__ComputeCut +#define ComputeVolume libmetis__ComputeVolume +#define ComputeMaxCut libmetis__ComputeMaxCut +#define CheckBnd libmetis__CheckBnd +#define CheckBnd2 libmetis__CheckBnd2 +#define CheckNodeBnd libmetis__CheckNodeBnd +#define CheckRInfo libmetis__CheckRInfo +#define CheckNodePartitionParams libmetis__CheckNodePartitionParams +#define IsSeparable libmetis__IsSeparable +#define CheckKWayVolPartitionParams libmetis__CheckKWayVolPartitionParams + +/* fm.c */ +#define FM_2WayRefine libmetis__FM_2WayRefine +#define FM_2WayCutRefine libmetis__FM_2WayCutRefine +#define FM_Mc2WayCutRefine libmetis__FM_Mc2WayCutRefine +#define SelectQueue libmetis__SelectQueue +#define Print2WayRefineStats libmetis__Print2WayRefineStats + +/* fortran.c */ +#define Change2CNumbering libmetis__Change2CNumbering +#define Change2FNumbering libmetis__Change2FNumbering +#define Change2FNumbering2 libmetis__Change2FNumbering2 +#define Change2FNumberingOrder libmetis__Change2FNumberingOrder +#define ChangeMesh2CNumbering libmetis__ChangeMesh2CNumbering +#define ChangeMesh2FNumbering libmetis__ChangeMesh2FNumbering +#define ChangeMesh2FNumbering2 libmetis__ChangeMesh2FNumbering2 + +/* graph.c */ +#define SetupGraph libmetis__SetupGraph +#define SetupGraph_adjrsum libmetis__SetupGraph_adjrsum +#define SetupGraph_tvwgt libmetis__SetupGraph_tvwgt +#define SetupGraph_label libmetis__SetupGraph_label +#define SetupSplitGraph libmetis__SetupSplitGraph +#define CreateGraph libmetis__CreateGraph +#define InitGraph libmetis__InitGraph +#define FreeRData libmetis__FreeRData +#define FreeGraph libmetis__FreeGraph +#define graph_WriteToDisk libmetis__graph_WriteToDisk +#define graph_ReadFromDisk libmetis__graph_ReadFromDisk + +/* initpart.c */ +#define Init2WayPartition libmetis__Init2WayPartition +#define InitSeparator libmetis__InitSeparator +#define RandomBisection libmetis__RandomBisection +#define GrowBisection libmetis__GrowBisection +#define McRandomBisection libmetis__McRandomBisection +#define McGrowBisection libmetis__McGrowBisection +#define GrowBisectionNode libmetis__GrowBisectionNode + +/* kmetis.c */ +#define MlevelKWayPartitioning libmetis__MlevelKWayPartitioning +#define InitKWayPartitioning libmetis__InitKWayPartitioning + +/* kwayfm.c */ +#define Greedy_KWayOptimize libmetis__Greedy_KWayOptimize +#define Greedy_KWayCutOptimize libmetis__Greedy_KWayCutOptimize +#define Greedy_KWayVolOptimize libmetis__Greedy_KWayVolOptimize +#define Greedy_McKWayCutOptimize libmetis__Greedy_McKWayCutOptimize +#define Greedy_McKWayVolOptimize libmetis__Greedy_McKWayVolOptimize +#define IsArticulationNode libmetis__IsArticulationNode +#define KWayVolUpdate libmetis__KWayVolUpdate + +/* kwayrefine.c */ +#define RefineKWay libmetis__RefineKWay +#define AllocateKWayPartitionMemory libmetis__AllocateKWayPartitionMemory +#define ComputeKWayPartitionParams libmetis__ComputeKWayPartitionParams +#define ProjectKWayPartition libmetis__ProjectKWayPartition +#define ComputeKWayBoundary libmetis__ComputeKWayBoundary +#define ComputeKWayVolGains libmetis__ComputeKWayVolGains +#define IsBalanced libmetis__IsBalanced + +/* mcutil */ +#define rvecle libmetis__rvecle +#define rvecge libmetis__rvecge +#define rvecsumle libmetis__rvecsumle +#define rvecmaxdiff libmetis__rvecmaxdiff +#define ivecle libmetis__ivecle +#define ivecge libmetis__ivecge +#define ivecaxpylez libmetis__ivecaxpylez +#define ivecaxpygez libmetis__ivecaxpygez +#define BetterVBalance libmetis__BetterVBalance +#define BetterBalance2Way libmetis__BetterBalance2Way +#define BetterBalanceKWay libmetis__BetterBalanceKWay +#define ComputeLoadImbalance libmetis__ComputeLoadImbalance +#define ComputeLoadImbalanceDiff libmetis__ComputeLoadImbalanceDiff +#define ComputeLoadImbalanceDiffVec libmetis__ComputeLoadImbalanceDiffVec +#define ComputeLoadImbalanceVec libmetis__ComputeLoadImbalanceVec + +/* mesh.c */ +#define CreateGraphDual libmetis__CreateGraphDual +#define FindCommonElements libmetis__FindCommonElements +#define CreateGraphNodal libmetis__CreateGraphNodal +#define FindCommonNodes libmetis__FindCommonNodes +#define CreateMesh libmetis__CreateMesh +#define InitMesh libmetis__InitMesh +#define FreeMesh libmetis__FreeMesh + +/* meshpart.c */ +#define InduceRowPartFromColumnPart libmetis__InduceRowPartFromColumnPart + +/* minconn.c */ +#define ComputeSubDomainGraph libmetis__ComputeSubDomainGraph +#define UpdateEdgeSubDomainGraph libmetis__UpdateEdgeSubDomainGraph +#define PrintSubDomainGraph libmetis__PrintSubDomainGraph +#define EliminateSubDomainEdges libmetis__EliminateSubDomainEdges +#define MoveGroupMinConnForCut libmetis__MoveGroupMinConnForCut +#define MoveGroupMinConnForVol libmetis__MoveGroupMinConnForVol + +/* mincover.c */ +#define MinCover libmetis__MinCover +#define MinCover_Augment libmetis__MinCover_Augment +#define MinCover_Decompose libmetis__MinCover_Decompose +#define MinCover_ColDFS libmetis__MinCover_ColDFS +#define MinCover_RowDFS libmetis__MinCover_RowDFS + +/* mmd.c */ +#define genmmd libmetis__genmmd +#define mmdelm libmetis__mmdelm +#define mmdint libmetis__mmdint +#define mmdnum libmetis__mmdnum +#define mmdupd libmetis__mmdupd + + +/* ometis.c */ +#define MlevelNestedDissection libmetis__MlevelNestedDissection +#define MlevelNestedDissectionCC libmetis__MlevelNestedDissectionCC +#define MlevelNodeBisectionMultiple libmetis__MlevelNodeBisectionMultiple +#define MlevelNodeBisectionL2 libmetis__MlevelNodeBisectionL2 +#define MlevelNodeBisectionL1 libmetis__MlevelNodeBisectionL1 +#define SplitGraphOrder libmetis__SplitGraphOrder +#define SplitGraphOrderCC libmetis__SplitGraphOrderCC +#define MMDOrder libmetis__MMDOrder + +/* options.c */ +#define SetupCtrl libmetis__SetupCtrl +#define SetupKWayBalMultipliers libmetis__SetupKWayBalMultipliers +#define Setup2WayBalMultipliers libmetis__Setup2WayBalMultipliers +#define PrintCtrl libmetis__PrintCtrl +#define FreeCtrl libmetis__FreeCtrl +#define CheckParams libmetis__CheckParams + +/* parmetis.c */ +#define MlevelNestedDissectionP libmetis__MlevelNestedDissectionP +#define FM_2WayNodeRefine1SidedP libmetis__FM_2WayNodeRefine1SidedP +#define FM_2WayNodeRefine2SidedP libmetis__FM_2WayNodeRefine2SidedP + +/* pmetis.c */ +#define MlevelRecursiveBisection libmetis__MlevelRecursiveBisection +#define MultilevelBisect libmetis__MultilevelBisect +#define SplitGraphPart libmetis__SplitGraphPart + +/* refine.c */ +#define Refine2Way libmetis__Refine2Way +#define Allocate2WayPartitionMemory libmetis__Allocate2WayPartitionMemory +#define Compute2WayPartitionParams libmetis__Compute2WayPartitionParams +#define Project2WayPartition libmetis__Project2WayPartition + +/* separator.c */ +#define ConstructSeparator libmetis__ConstructSeparator +#define ConstructMinCoverSeparator libmetis__ConstructMinCoverSeparator + +/* sfm.c */ +#define FM_2WayNodeRefine2Sided libmetis__FM_2WayNodeRefine2Sided +#define FM_2WayNodeRefine1Sided libmetis__FM_2WayNodeRefine1Sided +#define FM_2WayNodeBalance libmetis__FM_2WayNodeBalance + +/* srefine.c */ +#define Refine2WayNode libmetis__Refine2WayNode +#define Allocate2WayNodePartitionMemory libmetis__Allocate2WayNodePartitionMemory +#define Compute2WayNodePartitionParams libmetis__Compute2WayNodePartitionParams +#define Project2WayNodePartition libmetis__Project2WayNodePartition + +/* stat.c */ +#define ComputePartitionInfoBipartite libmetis__ComputePartitionInfoBipartite +#define ComputePartitionBalance libmetis__ComputePartitionBalance +#define ComputeElementBalance libmetis__ComputeElementBalance + +/* timing.c */ +#define InitTimers libmetis__InitTimers +#define PrintTimers libmetis__PrintTimers + +/* util.c */ +#define iargmax_strd libmetis__iargmax_strd +#define iargmax_nrm libmetis__iargmax_nrm +#define iargmax2_nrm libmetis__iargmax2_nrm +#define rargmax2 libmetis__rargmax2 +#define InitRandom libmetis__InitRandom +#define metis_rcode libmetis__metis_rcode + +/* wspace.c */ +#define AllocateWorkSpace libmetis__AllocateWorkSpace +#define AllocateRefinementWorkSpace libmetis__AllocateRefinementWorkSpace +#define FreeWorkSpace libmetis__FreeWorkSpace +#define wspacemalloc libmetis__wspacemalloc +#define wspacepush libmetis__wspacepush +#define wspacepop libmetis__wspacepop +#define iwspacemalloc libmetis__iwspacemalloc +#define rwspacemalloc libmetis__rwspacemalloc +#define ikvwspacemalloc libmetis__ikvwspacemalloc +#define cnbrpoolReset libmetis__cnbrpoolReset +#define cnbrpoolGetNext libmetis__cnbrpoolGetNext +#define vnbrpoolReset libmetis__vnbrpoolReset +#define vnbrpoolGetNext libmetis__vnbrpoolGetNext + +#endif + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/separator.c b/3rdParty/metis/metis-5.1.1/libmetis/separator.c new file mode 100644 index 000000000..72dae9b64 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/separator.c @@ -0,0 +1,176 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * separator.c + * + * This file contains code for separator extraction + * + * Started 8/1/97 + * George + * + * $Id: separator.c 10481 2011-07-05 18:01:23Z karypis $ + * + */ + +#include "metislib.h" + +/************************************************************************* +* This function takes a bisection and constructs a minimum weight vertex +* separator out of it. It uses the node-based separator refinement for it. +**************************************************************************/ +void ConstructSeparator(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, k, nvtxs, nbnd; + idx_t *xadj, *where, *bndind; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + nbnd = graph->nbnd; + bndind = graph->bndind; + + where = icopy(nvtxs, graph->where, iwspacemalloc(ctrl, nvtxs)); + + /* Put the nodes in the boundary into the separator */ + for (i=0; i<nbnd; i++) { + j = bndind[i]; + if (xadj[j+1]-xadj[j] > 0) /* Ignore islands */ + where[j] = 2; + } + + FreeRData(graph); + + Allocate2WayNodePartitionMemory(ctrl, graph); + icopy(nvtxs, where, graph->where); + + WCOREPOP; + + ASSERT(IsSeparable(graph)); + + Compute2WayNodePartitionParams(ctrl, graph); + + ASSERT(CheckNodePartitionParams(graph)); + + FM_2WayNodeRefine2Sided(ctrl, graph, 1); + FM_2WayNodeRefine1Sided(ctrl, graph, 4); + + ASSERT(IsSeparable(graph)); + +} + + + +/************************************************************************* +* This function takes a bisection and constructs a minimum weight vertex +* separator out of it. It uses an unweighted minimum-cover algorithm +* followed by node-based separator refinement. +**************************************************************************/ +void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, jj, k, l, nvtxs, nbnd, bnvtxs[3], bnedges[2], csize; + idx_t *xadj, *adjncy, *bxadj, *badjncy; + idx_t *where, *bndind, *bndptr, *vmap, *ivmap, *cover; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + + nbnd = graph->nbnd; + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + + vmap = iwspacemalloc(ctrl, nvtxs); + ivmap = iwspacemalloc(ctrl, nbnd); + cover = iwspacemalloc(ctrl, nbnd); + + if (nbnd > 0) { + /* Go through the boundary and determine the sizes of the bipartite graph */ + bnvtxs[0] = bnvtxs[1] = bnedges[0] = bnedges[1] = 0; + for (i=0; i<nbnd; i++) { + j = bndind[i]; + k = where[j]; + if (xadj[j+1]-xadj[j] > 0) { + bnvtxs[k]++; + bnedges[k] += xadj[j+1]-xadj[j]; + } + } + + bnvtxs[2] = bnvtxs[0]+bnvtxs[1]; + bnvtxs[1] = bnvtxs[0]; + bnvtxs[0] = 0; + + bxadj = iwspacemalloc(ctrl, bnvtxs[2]+1); + badjncy = iwspacemalloc(ctrl, bnedges[0]+bnedges[1]+1); + + /* Construct the ivmap and vmap */ + ASSERT(iset(nvtxs, -1, vmap) == vmap); + for (i=0; i<nbnd; i++) { + j = bndind[i]; + k = where[j]; + if (xadj[j+1]-xadj[j] > 0) { + vmap[j] = bnvtxs[k]; + ivmap[bnvtxs[k]++] = j; + } + } + + /* OK, go through and put the vertices of each part starting from 0 */ + bnvtxs[1] = bnvtxs[0]; + bnvtxs[0] = 0; + bxadj[0] = l = 0; + for (k=0; k<2; k++) { + for (ii=0; ii<nbnd; ii++) { + i = bndind[ii]; + if (where[i] == k && xadj[i] < xadj[i+1]) { + for (j=xadj[i]; j<xadj[i+1]; j++) { + jj = adjncy[j]; + if (where[jj] != k) { + ASSERT(bndptr[jj] != -1); + ASSERTP(vmap[jj] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", jj, vmap[jj], graph->bndptr[jj])); + badjncy[l++] = vmap[jj]; + } + } + bxadj[++bnvtxs[k]] = l; + } + } + } + + ASSERT(l <= bnedges[0]+bnedges[1]); + + MinCover(bxadj, badjncy, bnvtxs[0], bnvtxs[1], cover, &csize); + + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, bnvtxs[0], bnvtxs[1]-bnvtxs[0], csize)); + + for (i=0; i<csize; i++) { + j = ivmap[cover[i]]; + where[j] = 2; + } + } + else { + IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, + printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, (idx_t)0, (idx_t)0, (idx_t)0)); + } + + /* Prepare to refine the vertex separator */ + icopy(nvtxs, graph->where, vmap); + + FreeRData(graph); + + Allocate2WayNodePartitionMemory(ctrl, graph); + icopy(nvtxs, vmap, graph->where); + + WCOREPOP; + + Compute2WayNodePartitionParams(ctrl, graph); + + ASSERT(CheckNodePartitionParams(graph)); + + FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); + + ASSERT(IsSeparable(graph)); +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/sfm.c b/3rdParty/metis/metis-5.1.1/libmetis/sfm.c new file mode 100644 index 000000000..d41817380 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/sfm.c @@ -0,0 +1,612 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * sfm.c + * + * This file contains code that implementes an FM-based separator refinement + * + * Started 8/1/97 + * George + * + * $Id: sfm.c 10874 2011-10-17 23:13:00Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function performs a node-based FM refinement */ +/**************************************************************************/ +void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter) +{ + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *moved, *swaps; + rpq_t *queues[2]; + nrinfo_t *rinfo; + idx_t higain, oldgain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + idx_t u[2], g[2]; + real_t mult; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; + + queues[0] = rpqCreate(nvtxs); + queues[1] = rpqCreate(nvtxs); + + moved = iwspacemalloc(ctrl, nvtxs); + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); + + mult = 0.5*ctrl->ubfactors[0]; + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2])); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N2: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + + for (pass=0; pass<niter; pass++) { + iset(nvtxs, -1, moved); + rpqReset(queues[0]); + rpqReset(queues[1]); + + mincutorder = -1; + initcut = mincut = graph->mincut; + nbnd = graph->nbnd; + + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[swaps[ii]]; + ASSERT(where[i] == 2); + rpqInsert(queues[0], i, vwgt[i]-rinfo[i].edegrees[1]); + rpqInsert(queues[1], i, vwgt[i]-rinfo[i].edegrees[0]); + } + + ASSERT(CheckNodeBnd(graph, nbnd)); + ASSERT(CheckNodePartitionParams(graph)); + + limit = (ctrl->compress ? gk_min(5*nbnd, 400) : gk_min(2*nbnd, 300)); + + /****************************************************** + * Get into the FM loop + *******************************************************/ + mptr[0] = nmind = 0; + mindiff = iabs(pwgts[0]-pwgts[1]); + to = (pwgts[0] < pwgts[1] ? 0 : 1); + for (nswaps=0; nswaps<nvtxs; nswaps++) { + u[0] = rpqSeeTopVal(queues[0]); + u[1] = rpqSeeTopVal(queues[1]); + if (u[0] != -1 && u[1] != -1) { + g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1]; + g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0]; + + to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); + + if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) + to = (to+1)%2; + } + else if (u[0] == -1 && u[1] == -1) { + break; + } + else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) { + to = 0; + } + else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) { + to = 1; + } + else + break; + + other = (to+1)%2; + + higain = rpqGetTop(queues[to]); + if (moved[higain] == -1) /* Delete if it was in the separator originally */ + rpqDelete(queues[other], higain); + + ASSERT(bndptr[higain] != -1); + + /* The following check is to ensure we break out if there is a posibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) + break; + + pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); + + newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); + if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { + mincut = pwgts[2]; + mincutorder = nswaps; + mindiff = newdiff; + } + else { + if (nswaps - mincutorder > 2*limit || + (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) { + pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); + break; /* No further improvement, break out */ + } + } + + BNDDelete(nbnd, bndind, bndptr, higain); + pwgts[to] += vwgt[higain]; + where[higain] = to; + moved[higain] = nswaps; + swaps[nswaps] = higain; + + + /********************************************************** + * Update the degrees of the affected nodes + ***********************************************************/ + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ + oldgain = vwgt[k]-rinfo[k].edegrees[to]; + rinfo[k].edegrees[to] += vwgt[higain]; + if (moved[k] == -1 || moved[k] == -(2+other)) + rpqUpdate(queues[other], k, oldgain-vwgt[higain]); + } + else if (where[k] == other) { /* This vertex is pulled into the separator */ + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); + BNDInsert(nbnd, bndind, bndptr, k); + + mind[nmind++] = k; /* Keep track for rollback */ + where[k] = 2; + pwgts[other] -= vwgt[k]; + + edegrees = rinfo[k].edegrees; + edegrees[0] = edegrees[1] = 0; + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] != 2) + edegrees[where[kk]] += vwgt[kk]; + else { + oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; + rinfo[kk].edegrees[other] -= vwgt[k]; + if (moved[kk] == -1 || moved[kk] == -(2+to)) + rpqUpdate(queues[to], kk, oldgain+vwgt[k]); + } + } + + /* Insert the new vertex into the priority queue. Only one side! */ + if (moved[k] == -1) { + rpqInsert(queues[to], k, vwgt[k]-edegrees[other]); + moved[k] = -(2+to); + } + } + } + mptr[nswaps+1] = nmind; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] [%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); + + } + + + /**************************************************************** + * Roll back computation + *****************************************************************/ + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + ASSERT(CheckNodePartitionParams(graph)); + + to = where[higain]; + other = (to+1)%2; + INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); + where[higain] = 2; + BNDInsert(nbnd, bndind, bndptr, higain); + + edegrees = rinfo[higain].edegrees; + edegrees[0] = edegrees[1] = 0; + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) + rinfo[k].edegrees[to] -= vwgt[higain]; + else + edegrees[where[k]] += vwgt[k]; + } + + /* Push nodes out of the separator */ + for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { + k = mind[j]; + ASSERT(where[k] == 2); + where[k] = other; + INC_DEC(pwgts[other], pwgts[2], vwgt[k]); + BNDDelete(nbnd, bndind, bndptr, k); + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] == 2) + rinfo[kk].edegrees[other] += vwgt[k]; + } + } + } + + ASSERT(mincut == pwgts[2]); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + if (mincutorder == -1 || mincut >= initcut) + break; + } + + rpqDestroy(queues[0]); + rpqDestroy(queues[1]); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function performs a node-based FM refinement. + Each refinement iteration is split into two sub-iterations. + In each sub-iteration only moves to one of the left/right partitions + is allowed; hence, it is one-sided. +*/ +/**************************************************************************/ +void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter) +{ + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, iend; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *mptr, *mind, *swaps; + rpq_t *queue; + nrinfo_t *rinfo; + idx_t higain, mincut, initcut, mincutorder; + idx_t pass, to, other, limit; + idx_t badmaxpwgt, mindiff, newdiff; + real_t mult; + + WCOREPUSH; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; + + queue = rpqCreate(nvtxs); + + swaps = iwspacemalloc(ctrl, nvtxs); + mptr = iwspacemalloc(ctrl, nvtxs+1); + mind = iwspacemalloc(ctrl, 2*nvtxs); + + mult = 0.5*ctrl->ubfactors[0]; + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2])); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + + to = (pwgts[0] < pwgts[1] ? 1 : 0); + for (pass=0; pass<2*niter; pass++) { /* the 2*niter is for the two sides */ + other = to; + to = (to+1)%2; + + rpqReset(queue); + + mincutorder = -1; + initcut = mincut = graph->mincut; + nbnd = graph->nbnd; + + /* use the swaps array in place of the traditional perm array to save memory */ + irandArrayPermute(nbnd, swaps, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[swaps[ii]]; + ASSERT(where[i] == 2); + rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[other]); + } + + ASSERT(CheckNodeBnd(graph, nbnd)); + ASSERT(CheckNodePartitionParams(graph)); + + limit = (ctrl->compress ? gk_min(5*nbnd, 500) : gk_min(3*nbnd, 300)); + + /****************************************************** + * Get into the FM loop + *******************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr)); + mptr[0] = nmind = 0; + mindiff = iabs(pwgts[0]-pwgts[1]); + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if ((higain = rpqGetTop(queue)) == -1) + break; + + ASSERT(bndptr[higain] != -1); + + /* The following check is to ensure we break out if there is a posibility + of over-running the mind array. */ + if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) + break; + + if (pwgts[to]+vwgt[higain] > badmaxpwgt) + break; /* No point going any further. Balance will be bad */ + + pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); + + newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); + if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { + mincut = pwgts[2]; + mincutorder = nswaps; + mindiff = newdiff; + } + else { + if (nswaps - mincutorder > 3*limit || + (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) { + pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); + break; /* No further improvement, break out */ + } + } + + BNDDelete(nbnd, bndind, bndptr, higain); + pwgts[to] += vwgt[higain]; + where[higain] = to; + swaps[nswaps] = higain; + + + /********************************************************** + * Update the degrees of the affected nodes + ***********************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux1Tmr)); + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + + if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ + rinfo[k].edegrees[to] += vwgt[higain]; + } + else if (where[k] == other) { /* This vertex is pulled into the separator */ + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); + BNDInsert(nbnd, bndind, bndptr, k); + + mind[nmind++] = k; /* Keep track for rollback */ + where[k] = 2; + pwgts[other] -= vwgt[k]; + + edegrees = rinfo[k].edegrees; + edegrees[0] = edegrees[1] = 0; + for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) { + kk = adjncy[jj]; + if (where[kk] != 2) + edegrees[where[kk]] += vwgt[kk]; + else { + rinfo[kk].edegrees[other] -= vwgt[k]; + + /* Since the moves are one-sided this vertex has not been moved yet */ + rpqUpdate(queue, kk, vwgt[kk]-rinfo[kk].edegrees[other]); + } + } + + /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ + rpqInsert(queue, k, vwgt[k]-edegrees[other]); + } + } + mptr[nswaps+1] = nmind; + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux1Tmr)); + + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", + higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], + pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); + } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr)); + + + /**************************************************************** + * Roll back computation + *****************************************************************/ + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux2Tmr)); + for (nswaps--; nswaps>mincutorder; nswaps--) { + higain = swaps[nswaps]; + + ASSERT(CheckNodePartitionParams(graph)); + ASSERT(where[higain] == to); + + INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); + where[higain] = 2; + BNDInsert(nbnd, bndind, bndptr, higain); + + edegrees = rinfo[higain].edegrees; + edegrees[0] = edegrees[1] = 0; + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) + rinfo[k].edegrees[to] -= vwgt[higain]; + else + edegrees[where[k]] += vwgt[k]; + } + + /* Push nodes out of the separator */ + for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { + k = mind[j]; + ASSERT(where[k] == 2); + where[k] = other; + INC_DEC(pwgts[other], pwgts[2], vwgt[k]); + BNDDelete(nbnd, bndind, bndptr, k); + for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) { + kk = adjncy[jj]; + if (where[kk] == 2) + rinfo[kk].edegrees[other] += vwgt[k]; + } + } + } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux2Tmr)); + + ASSERT(mincut == pwgts[2]); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = mincut; + graph->nbnd = nbnd; + + if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) + break; + } + + rpqDestroy(queue); + + WCOREPOP; +} + + +/*************************************************************************/ +/*! This function balances the left/right partitions of a separator + tri-section */ +/*************************************************************************/ +void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, gain; + idx_t badmaxpwgt, higain, oldgain, pass, to, other; + idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; + idx_t *perm, *moved; + rpq_t *queue; + nrinfo_t *rinfo; + real_t mult; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + + bndind = graph->bndind; + bndptr = graph->bndptr; + where = graph->where; + pwgts = graph->pwgts; + rinfo = graph->nrinfo; + + mult = 0.5*ctrl->ubfactors[0]; + + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1])); + if (gk_max(pwgts[0], pwgts[1]) < badmaxpwgt) + return; + if (iabs(pwgts[0]-pwgts[1]) < 3*graph->tvwgt[0]/nvtxs) + return; + + WCOREPUSH; + + to = (pwgts[0] < pwgts[1] ? 0 : 1); + other = (to+1)%2; + + queue = rpqCreate(nvtxs); + + perm = iwspacemalloc(ctrl, nvtxs); + moved = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs)); + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX" [B]\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); + + nbnd = graph->nbnd; + irandArrayPermute(nbnd, perm, nbnd, 1); + for (ii=0; ii<nbnd; ii++) { + i = bndind[perm[ii]]; + ASSERT(where[i] == 2); + rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[other]); + } + + ASSERT(CheckNodeBnd(graph, nbnd)); + ASSERT(CheckNodePartitionParams(graph)); + + /****************************************************** + * Get into the FM loop + *******************************************************/ + for (nswaps=0; nswaps<nvtxs; nswaps++) { + if ((higain = rpqGetTop(queue)) == -1) + break; + + moved[higain] = 1; + + gain = vwgt[higain]-rinfo[higain].edegrees[other]; + badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1])); + + /* break if other is now underwight */ + if (pwgts[to] > pwgts[other]) + break; + + /* break if balance is achieved and no +ve or zero gain */ + if (gain < 0 && pwgts[other] < badmaxpwgt) + break; + + /* skip this vertex if it will violate balance on the other side */ + if (pwgts[to]+vwgt[higain] > badmaxpwgt) + continue; + + ASSERT(bndptr[higain] != -1); + + pwgts[2] -= gain; + + BNDDelete(nbnd, bndind, bndptr, higain); + pwgts[to] += vwgt[higain]; + where[higain] = to; + + IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, + printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %3"PRIDX", \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, vwgt[higain]-rinfo[higain].edegrees[other], pwgts[0], pwgts[1], pwgts[2])); + + + /********************************************************** + * Update the degrees of the affected nodes + ***********************************************************/ + for (j=xadj[higain]; j<xadj[higain+1]; j++) { + k = adjncy[j]; + if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ + rinfo[k].edegrees[to] += vwgt[higain]; + } + else if (where[k] == other) { /* This vertex is pulled into the separator */ + ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k])); + BNDInsert(nbnd, bndind, bndptr, k); + + where[k] = 2; + pwgts[other] -= vwgt[k]; + + edegrees = rinfo[k].edegrees; + edegrees[0] = edegrees[1] = 0; + for (jj=xadj[k]; jj<xadj[k+1]; jj++) { + kk = adjncy[jj]; + if (where[kk] != 2) + edegrees[where[kk]] += vwgt[kk]; + else { + ASSERT(bndptr[kk] != -1); + oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; + rinfo[kk].edegrees[other] -= vwgt[k]; + + if (moved[kk] == -1) + rpqUpdate(queue, kk, oldgain+vwgt[k]); + } + } + + /* Insert the new vertex into the priority queue */ + rpqInsert(queue, k, vwgt[k]-edegrees[other]); + } + } + } + + IFSET(ctrl->dbglvl, METIS_DBG_REFINE, + printf("\tBalanced sep: %6"PRIDX" at %4"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", pwgts[2], nswaps, pwgts[0], pwgts[1], nbnd)); + + graph->mincut = pwgts[2]; + graph->nbnd = nbnd; + + rpqDestroy(queue); + + WCOREPOP; +} + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/srefine.c b/3rdParty/metis/metis-5.1.1/libmetis/srefine.c new file mode 100644 index 000000000..5d3560bde --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/srefine.c @@ -0,0 +1,165 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * srefine.c + * + * This file contains code for the separator refinement algortihms + * + * Started 8/1/97 + * George + * + * $Id: srefine.c 14362 2013-05-21 21:35:23Z karypis $ + * + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function is the entry point of the separator refinement. + It does not perform any refinement on graph, but it starts by first + projecting it to the next level finer graph and proceeds from there. */ +/*************************************************************************/ +void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph) +{ + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr)); + + if (graph == orggraph) { + Compute2WayNodePartitionParams(ctrl, graph); + } + else { + do { + graph = graph->finer; + + graph_ReadFromDisk(ctrl, graph); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr)); + Project2WayNodePartition(ctrl, graph); + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr)); + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr)); + FM_2WayNodeBalance(ctrl, graph); + + ASSERT(CheckNodePartitionParams(graph)); + + switch (ctrl->rtype) { + case METIS_RTYPE_SEP2SIDED: + FM_2WayNodeRefine2Sided(ctrl, graph, ctrl->niter); + break; + case METIS_RTYPE_SEP1SIDED: + FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); + break; + default: + gk_errexit(SIGERR, "Unknown rtype of %d\n", ctrl->rtype); + } + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr)); + + } while (graph != orggraph); + } + + IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr)); +} + + +/*************************************************************************/ +/*! This function allocates memory for 2-way node-based refinement */ +/**************************************************************************/ +void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph) +{ + idx_t nvtxs; + + nvtxs = graph->nvtxs; + + graph->pwgts = imalloc(3, "Allocate2WayNodePartitionMemory: pwgts"); + graph->where = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: where"); + graph->bndptr = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndptr"); + graph->bndind = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndind"); + graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "Allocate2WayNodePartitionMemory: nrinfo"); +} + + +/*************************************************************************/ +/*! This function computes the edegrees[] to the left & right sides */ +/*************************************************************************/ +void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, nvtxs, nbnd; + idx_t *xadj, *adjncy, *vwgt; + idx_t *where, *pwgts, *bndind, *bndptr, *edegrees; + nrinfo_t *rinfo; + idx_t me, other; + + nvtxs = graph->nvtxs; + xadj = graph->xadj; + vwgt = graph->vwgt; + adjncy = graph->adjncy; + + where = graph->where; + rinfo = graph->nrinfo; + pwgts = iset(3, 0, graph->pwgts); + bndind = graph->bndind; + bndptr = iset(nvtxs, -1, graph->bndptr); + + + /*------------------------------------------------------------ + / Compute now the separator external degrees + /------------------------------------------------------------*/ + nbnd = 0; + for (i=0; i<nvtxs; i++) { + me = where[i]; + pwgts[me] += vwgt[i]; + + ASSERT(me >=0 && me <= 2); + + if (me == 2) { /* If it is on the separator do some computations */ + BNDInsert(nbnd, bndind, bndptr, i); + + edegrees = rinfo[i].edegrees; + edegrees[0] = edegrees[1] = 0; + + for (j=xadj[i]; j<xadj[i+1]; j++) { + other = where[adjncy[j]]; + if (other != 2) + edegrees[other] += vwgt[adjncy[j]]; + } + } + } + + ASSERT(CheckNodeBnd(graph, nbnd)); + + graph->mincut = pwgts[2]; + graph->nbnd = nbnd; +} + + +/*************************************************************************/ +/*! This function projects the node-based bisection */ +/*************************************************************************/ +void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph) +{ + idx_t i, j, nvtxs; + idx_t *cmap, *where, *cwhere; + graph_t *cgraph; + + cgraph = graph->coarser; + cwhere = cgraph->where; + + nvtxs = graph->nvtxs; + cmap = graph->cmap; + + Allocate2WayNodePartitionMemory(ctrl, graph); + where = graph->where; + + /* Project the partition */ + for (i=0; i<nvtxs; i++) { + where[i] = cwhere[cmap[i]]; + ASSERTP(where[i] >= 0 && where[i] <= 2, ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", + i, cmap[i], where[i], cwhere[cmap[i]])); + } + + FreeGraph(&graph->coarser); + graph->coarser = NULL; + + Compute2WayNodePartitionParams(ctrl, graph); +} diff --git a/3rdParty/metis/metis-5.1.1/libmetis/stat.c b/3rdParty/metis/metis-5.1.1/libmetis/stat.c new file mode 100644 index 000000000..686009431 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/stat.c @@ -0,0 +1,179 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * stat.c + * + * This file computes various statistics + * + * Started 7/25/97 + * George + * + * $Id: stat.c 17513 2014-08-05 16:20:50Z dominique $ + * + */ + +#include "metislib.h" + + +/************************************************************************* +* This function computes cuts and balance information +**************************************************************************/ +void ComputePartitionInfoBipartite(graph_t *graph, idx_t nparts, idx_t *where) +{ + idx_t i, j, k, nvtxs, ncon, mustfree=0; + idx_t *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr; + idx_t *padjncy, *padjwgt, *padjcut; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + xadj = graph->xadj; + adjncy = graph->adjncy; + vwgt = graph->vwgt; + vsize = graph->vsize; + adjwgt = graph->adjwgt; + + if (vwgt == NULL) { + vwgt = graph->vwgt = ismalloc(nvtxs, 1, "vwgt"); + mustfree = 1; + } + if (adjwgt == NULL) { + adjwgt = graph->adjwgt = ismalloc(xadj[nvtxs], 1, "adjwgt"); + mustfree += 2; + } + + printf("%"PRIDX"-way Cut: %5"PRIDX", Vol: %5"PRIDX", ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); + + /* Compute balance information */ + kpwgts = ismalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); + + for (i=0; i<nvtxs; i++) { + for (j=0; j<ncon; j++) + kpwgts[where[i]*ncon+j] += vwgt[i*ncon+j]; + } + + if (ncon == 1) { + printf("\tBalance: %5.3"PRREAL" out of %5.3"PRREAL"\n", + 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1)), + 1.0*nparts*vwgt[iargmax(nvtxs, vwgt,1)]/(1.0*isum(nparts, kpwgts, 1))); + } + else { + printf("\tBalance:"); + for (j=0; j<ncon; j++) + printf(" (%5.3"PRREAL" out of %5.3"PRREAL")", + 1.0*nparts*kpwgts[ncon*iargmax_strd(nparts, kpwgts+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)), + 1.0*nparts*vwgt[ncon*iargmax_strd(nvtxs, vwgt+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon))); + printf("\n"); + } + + + /* Compute p-adjncy information */ + padjncy = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjncy"); + padjwgt = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); + padjcut = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); + + iset(nparts, 0, kpwgts); + for (i=0; i<nvtxs; i++) { + for (j=xadj[i]; j<xadj[i+1]; j++) { + if (where[i] != where[adjncy[j]]) { + padjncy[where[i]*nparts+where[adjncy[j]]] = 1; + padjcut[where[i]*nparts+where[adjncy[j]]] += adjwgt[j]; + if (kpwgts[where[adjncy[j]]] == 0) { + padjwgt[where[i]*nparts+where[adjncy[j]]] += vsize[i]; + kpwgts[where[adjncy[j]]] = 1; + } + } + } + for (j=xadj[i]; j<xadj[i+1]; j++) + kpwgts[where[adjncy[j]]] = 0; + } + + for (i=0; i<nparts; i++) + kpwgts[i] = isum(nparts, padjncy+i*nparts, 1); + printf("Min/Max/Avg/Bal # of adjacent subdomains: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n", + kpwgts[iargmin(nparts, kpwgts,1)], kpwgts[iargmax(nparts, kpwgts,1)], isum(nparts, kpwgts, 1)/nparts, + 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1))); + + for (i=0; i<nparts; i++) + kpwgts[i] = isum(nparts, padjcut+i*nparts, 1); + printf("Min/Max/Avg/Bal # of adjacent subdomain cuts: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n", + kpwgts[iargmin(nparts, kpwgts,1)], kpwgts[iargmax(nparts, kpwgts,1)], isum(nparts, kpwgts, 1)/nparts, + 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1))); + + for (i=0; i<nparts; i++) + kpwgts[i] = isum(nparts, padjwgt+i*nparts, 1); + printf("Min/Max/Avg/Bal/Frac # of interface nodes: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL" %7.3"PRREAL"\n", + kpwgts[iargmin(nparts, kpwgts,1)], kpwgts[iargmax(nparts, kpwgts,1)], isum(nparts, kpwgts, 1)/nparts, + 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1)), 1.0*isum(nparts, kpwgts, 1)/(1.0*nvtxs)); + + + if (mustfree == 1 || mustfree == 3) { + gk_free((void **)&vwgt, LTERM); + graph->vwgt = NULL; + } + if (mustfree == 2 || mustfree == 3) { + gk_free((void **)&adjwgt, LTERM); + graph->adjwgt = NULL; + } + + gk_free((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); +} + + +/************************************************************************* +* This function computes the balance of the partitioning +**************************************************************************/ +void ComputePartitionBalance(graph_t *graph, idx_t nparts, idx_t *where, real_t *ubvec) +{ + idx_t i, j, nvtxs, ncon; + idx_t *kpwgts, *vwgt; + real_t balance; + + nvtxs = graph->nvtxs; + ncon = graph->ncon; + vwgt = graph->vwgt; + + kpwgts = ismalloc(nparts, 0, "ComputePartitionInfo: kpwgts"); + + if (vwgt == NULL) { + for (i=0; i<nvtxs; i++) + kpwgts[where[i]]++; + ubvec[0] = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*nvtxs); + } + else { + for (j=0; j<ncon; j++) { + iset(nparts, 0, kpwgts); + for (i=0; i<graph->nvtxs; i++) + kpwgts[where[i]] += vwgt[i*ncon+j]; + + ubvec[j] = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1)); + } + } + + gk_free((void **)&kpwgts, LTERM); + +} + + +/************************************************************************* +* This function computes the balance of the element partitioning +**************************************************************************/ +real_t ComputeElementBalance(idx_t ne, idx_t nparts, idx_t *where) +{ + idx_t i; + idx_t *kpwgts; + real_t balance; + + kpwgts = ismalloc(nparts, 0, "ComputeElementBalance: kpwgts"); + + for (i=0; i<ne; i++) + kpwgts[where[i]]++; + + balance = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts,1)]/(1.0*isum(nparts, kpwgts, 1)); + + gk_free((void **)&kpwgts, LTERM); + + return balance; + +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/stdheaders.h b/3rdParty/metis/metis-5.1.1/libmetis/stdheaders.h new file mode 100644 index 000000000..148f88d48 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/stdheaders.h @@ -0,0 +1,29 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * stdheaders.h + * + * This file includes all necessary header files + * + * Started 8/27/94 + * George + * + * $Id: stdheaders.h 5993 2009-01-07 02:09:57Z karypis $ + */ + +#ifndef _LIBMETIS_STDHEADERS_H_ +#define _LIBMETIS_STDHEADERS_H_ + +#include <stdio.h> +#ifdef __STDC__ +#include <stdlib.h> +#else +#include <malloc.h> +#endif +#include <string.h> +#include <ctype.h> +#include <math.h> +#include <stdarg.h> +#include <time.h> + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/struct.h b/3rdParty/metis/metis-5.1.1/libmetis/struct.h new file mode 100644 index 000000000..afd8c24fe --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/struct.h @@ -0,0 +1,219 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * struct.h + * + * This file contains data structures for ILU routines. + * + * Started 9/26/95 + * George + * + * $Id: struct.h 14362 2013-05-21 21:35:23Z karypis $ + */ + +#ifndef _LIBMETIS_STRUCT_H_ +#define _LIBMETIS_STRUCT_H_ + + + +/*************************************************************************/ +/*! This data structure stores cut-based k-way refinement info about an + adjacent subdomain for a given vertex. */ +/*************************************************************************/ +typedef struct cnbr_t { + idx_t pid; /*!< The partition ID */ + idx_t ed; /*!< The sum of the weights of the adjacent edges + that are incident on pid */ +} cnbr_t; + + +/*************************************************************************/ +/*! The following data structure stores holds information on degrees for k-way + partition */ +/*************************************************************************/ +typedef struct ckrinfo_t { + idx_t id; /*!< The internal degree of a vertex (sum of weights) */ + idx_t ed; /*!< The total external degree of a vertex */ + idx_t nnbrs; /*!< The number of neighboring subdomains */ + idx_t inbr; /*!< The index in the cnbr_t array where the nnbrs list + of neighbors is stored */ +} ckrinfo_t; + + +/*************************************************************************/ +/*! This data structure stores volume-based k-way refinement info about an + adjacent subdomain for a given vertex. */ +/*************************************************************************/ +typedef struct vnbr_t { + idx_t pid; /*!< The partition ID */ + idx_t ned; /*!< The number of the adjacent edges + that are incident on pid */ + idx_t gv; /*!< The gain in volume achieved by moving the + vertex to pid */ +} vnbr_t; + + +/*************************************************************************/ +/*! The following data structure holds information on degrees for k-way + vol-based partition */ +/*************************************************************************/ +typedef struct vkrinfo_t { + idx_t nid; /*!< The internal degree of a vertex (count of edges) */ + idx_t ned; /*!< The total external degree of a vertex (count of edges) */ + idx_t gv; /*!< The volume gain of moving that vertex */ + idx_t nnbrs; /*!< The number of neighboring subdomains */ + idx_t inbr; /*!< The index in the vnbr_t array where the nnbrs list + of neighbors is stored */ +} vkrinfo_t; + + +/*************************************************************************/ +/*! The following data structure holds information on degrees for k-way + partition */ +/*************************************************************************/ +typedef struct nrinfo_t { + idx_t edegrees[2]; +} nrinfo_t; + + +/*************************************************************************/ +/*! This data structure holds a graph */ +/*************************************************************************/ +typedef struct graph_t { + idx_t nvtxs, nedges; /* The # of vertices and edges in the graph */ + idx_t ncon; /* The # of constrains */ + idx_t *xadj; /* Pointers to the locally stored vertices */ + idx_t *vwgt; /* Vertex weights */ + idx_t *vsize; /* Vertex sizes for min-volume formulation */ + idx_t *adjncy; /* Array that stores the adjacency lists of nvtxs */ + idx_t *adjwgt; /* Array that stores the weights of the adjacency lists */ + + idx_t *tvwgt; /* The sum of the vertex weights in the graph */ + real_t *invtvwgt; /* The inverse of the sum of the vertex weights in the graph */ + + + /* These are to keep track control if the corresponding fields correspond to + application or library memory */ + int free_xadj, free_vwgt, free_vsize, free_adjncy, free_adjwgt; + + idx_t *cmap; /* The contraction/coarsening map */ + + idx_t *label; /* The labels of the vertices for recusive bisection (pmetis/ometis) */ + + /* Partition parameters */ + idx_t mincut, minvol; + idx_t *where, *pwgts; + idx_t nbnd; + idx_t *bndptr, *bndind; + + /* Bisection refinement parameters */ + idx_t *id, *ed; + + /* K-way refinement parameters */ + ckrinfo_t *ckrinfo; /*!< The per-vertex cut-based refinement info */ + vkrinfo_t *vkrinfo; /*!< The per-vertex volume-based refinement info */ + + /* Node refinement information */ + nrinfo_t *nrinfo; + + /* various fields for out-of-core processing */ + int gID; + int ondisk; + + /* keep track of the dropped edgewgt */ + idx_t droppedewgt; + + /* the linked-list structure of the sequence of graphs */ + struct graph_t *coarser, *finer; + +} graph_t; + + +/*************************************************************************/ +/*! This data structure holds a mesh */ +/*************************************************************************/ +typedef struct mesh_t { + idx_t ne, nn; /*!< The # of elements and nodes in the mesh */ + idx_t ncon; /*!< The number of element balancing constraints (element weights) */ + + idx_t *eptr, *eind; /*!< The CSR-structure storing the nodes in the elements */ + idx_t *ewgt; /*!< The weights of the elements */ +} mesh_t; + + + +/*************************************************************************/ +/*! The following structure stores information used by Metis */ +/*************************************************************************/ +typedef struct ctrl_t { + moptype_et optype; /* Type of operation */ + mobjtype_et objtype; /* Type of refinement objective */ + mdbglvl_et dbglvl; /* Controls the debuging output of the program */ + mctype_et ctype; /* The type of coarsening */ + miptype_et iptype; /* The type of initial partitioning */ + mrtype_et rtype; /* The type of refinement */ + + idx_t CoarsenTo; /* The # of vertices in the coarsest graph */ + idx_t nIparts; /* The number of initial partitions to compute */ + idx_t no2hop; /* Indicates if 2-hop matching will be used */ + idx_t ondisk; /* Indicates out-of-core execution */ + idx_t minconn; /* Indicates if the subdomain connectivity will be minimized */ + idx_t contig; /* Indicates if contigous partitions are required */ + idx_t nseps; /* The number of separators to be found during multiple bisections */ + idx_t ufactor; /* The user-supplied load imbalance factor */ + idx_t compress; /* If the graph will be compressed prior to ordering */ + idx_t ccorder; /* If connected components will be ordered separately */ + idx_t seed; /* The seed for the random number generator */ + idx_t ncuts; /* The number of different partitionings to compute */ + idx_t niter; /* The number of iterations during each refinement */ + idx_t numflag; /* The user-supplied numflag for the graph */ + idx_t dropedges; /* Indicates if edges will be randomly dropped during coarsening */ + idx_t *maxvwgt; /* The maximum allowed weight for a vertex */ + + idx_t ncon; /*!< The number of balancing constraints */ + idx_t nparts; /*!< The number of partitions */ + + real_t pfactor; /* .1*(user-supplied prunning factor) */ + + real_t *ubfactors; /*!< The per-constraint ubfactors */ + + real_t *tpwgts; /*!< The target partition weights */ + real_t *pijbm; /*!< The nparts*ncon multiplies for the ith partition + and jth constraint for obtaining the balance */ + + real_t cfactor; /*!< The achieved compression factor */ + + /* Various Timers */ + double TotalTmr, InitPartTmr, MatchTmr, ContractTmr, CoarsenTmr, UncoarsenTmr, + RefTmr, ProjectTmr, SplitTmr, Aux1Tmr, Aux2Tmr, Aux3Tmr; + + /* Workspace information */ + gk_mcore_t *mcore; /*!< The persistent memory core for within function + mallocs/frees */ + + /* These are for use by the k-way refinement routines */ + size_t nbrpoolsize; /*!< The number of {c,v}nbr_t entries that have been allocated */ + size_t nbrpoolcpos; /*!< The position of the first free entry in the array */ + size_t nbrpoolreallocs; /*!< The number of times the pool was resized */ + + cnbr_t *cnbrpool; /*!< The pool of cnbr_t entries to be used during refinement. + The size and current position of the pool is controlled + by nnbrs & cnbrs */ + vnbr_t *vnbrpool; /*!< The pool of vnbr_t entries to be used during refinement. + The size and current position of the pool is controlled + by nnbrs & cnbrs */ + + /* The subdomain graph, in sparse format */ + idx_t *maxnads; /* The maximum allocated number of adjacent domains */ + idx_t *nads; /* The number of adjacent domains */ + idx_t **adids; /* The IDs of the adjacent domains */ + idx_t **adwgts; /* The edge-weight to the adjacent domains */ + idx_t *pvec1, *pvec2; /* Auxiliar nparts-size vectors for efficiency */ + + /* ondisk related info */ + pid_t pid; /*!< The pid of the running process */ +} ctrl_t; + + + +#endif diff --git a/3rdParty/metis/metis-5.1.1/libmetis/timing.c b/3rdParty/metis/metis-5.1.1/libmetis/timing.c new file mode 100644 index 000000000..9d6e05cf1 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/timing.c @@ -0,0 +1,63 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * timing.c + * + * This file contains routines that deal with timing Metis + * + * Started 7/24/97 + * George + * + * $Id: timing.c 13936 2013-03-30 03:59:09Z karypis $ + * + */ + +#include "metislib.h" + + +/************************************************************************* +* This function clears the timers +**************************************************************************/ +void InitTimers(ctrl_t *ctrl) +{ + gk_clearcputimer(ctrl->TotalTmr); + gk_clearcputimer(ctrl->InitPartTmr); + gk_clearcputimer(ctrl->MatchTmr); + gk_clearcputimer(ctrl->ContractTmr); + gk_clearcputimer(ctrl->CoarsenTmr); + gk_clearcputimer(ctrl->UncoarsenTmr); + gk_clearcputimer(ctrl->RefTmr); + gk_clearcputimer(ctrl->ProjectTmr); + gk_clearcputimer(ctrl->SplitTmr); + gk_clearcputimer(ctrl->Aux1Tmr); + gk_clearcputimer(ctrl->Aux2Tmr); + gk_clearcputimer(ctrl->Aux3Tmr); +} + + + +/************************************************************************* +* This function prints the various timers +**************************************************************************/ +void PrintTimers(ctrl_t *ctrl) +{ + printf("\nTiming Information -------------------------------------------------"); + printf("\n Multilevel: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->TotalTmr)); + printf("\n Coarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->CoarsenTmr)); + printf("\n Matching: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->MatchTmr)); + printf("\n Contract: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ContractTmr)); + printf("\n Initial Partition: \t %7.3"PRREAL"", gk_getcputimer(ctrl->InitPartTmr)); + printf("\n Uncoarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->UncoarsenTmr)); + printf("\n Refinement: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->RefTmr)); + printf("\n Projection: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ProjectTmr)); + printf("\n Splitting: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->SplitTmr)); +/* + printf("\n Aux1Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux1Tmr)); + printf("\n Aux2Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux2Tmr)); + printf("\n Aux3Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux3Tmr)); +*/ + printf("\n********************************************************************\n"); +} + + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/util.c b/3rdParty/metis/metis-5.1.1/libmetis/util.c new file mode 100644 index 000000000..7fbc46726 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/util.c @@ -0,0 +1,138 @@ +/* + * Copyright 1997, Regents of the University of Minnesota + * + * util.c + * + * This function contains various utility routines + * + * Started 9/28/95 + * George + * + * $Id: util.c 10495 2011-07-06 16:04:45Z karypis $ + */ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function initializes the random number generator + */ +/*************************************************************************/ +void InitRandom(idx_t seed) +{ + isrand((seed == -1 ? 4321 : seed)); +} + + +/*************************************************************************/ +/*! Returns the highest weight index of x[i]*y[i] + */ +/*************************************************************************/ +idx_t iargmax_nrm(size_t n, idx_t *x, real_t *y) +{ + idx_t i, max=0; + + for (i=1; i<n; i++) + max = (x[i]*y[i] > x[max]*y[max] ? i : max); + + return max; +} + + +/*************************************************************************/ +/*! These functions return the index of the maximum element in a vector + */ +/*************************************************************************/ +idx_t iargmax_strd(size_t n, idx_t *x, idx_t incx) +{ + size_t i, max=0; + + n *= incx; + for (i=incx; i<n; i+=incx) + max = (x[i] > x[max] ? i : max); + + return max/incx; +} + + +/*************************************************************************/ +/*! These functions return the index of the almost maximum element in a + vector + */ +/*************************************************************************/ +idx_t rargmax2(size_t n, real_t *x) +{ + size_t i, max1, max2; + + if (x[0] > x[1]) { + max1 = 0; + max2 = 1; + } + else { + max1 = 1; + max2 = 0; + } + + for (i=2; i<n; i++) { + if (x[i] > x[max1]) { + max2 = max1; + max1 = i; + } + else if (x[i] > x[max2]) + max2 = i; + } + + return max2; +} + + +/*************************************************************************/ +/*! These functions return the index of the second largest elements in the + vector formed by x.y where '.' is element-wise multiplication */ +/*************************************************************************/ +idx_t iargmax2_nrm(size_t n, idx_t *x, real_t *y) +{ + size_t i, max1, max2; + + if (x[0]*y[0] > x[1]*y[1]) { + max1 = 0; + max2 = 1; + } + else { + max1 = 1; + max2 = 0; + } + + for (i=2; i<n; i++) { + if (x[i]*y[i] > x[max1]*y[max1]) { + max2 = max1; + max1 = i; + } + else if (x[i]*y[i] > x[max2]*y[max2]) + max2 = i; + } + + return max2; +} + + +/*************************************************************************/ +/*! converts a signal code into a Metis return code + */ +/*************************************************************************/ +int metis_rcode(int sigrval) +{ + switch (sigrval) { + case 0: + return METIS_OK; + break; + case SIGMEM: + return METIS_ERROR_MEMORY; + break; + default: + return METIS_ERROR; + break; + } +} + + diff --git a/3rdParty/metis/metis-5.1.1/libmetis/wspace.c b/3rdParty/metis/metis-5.1.1/libmetis/wspace.c new file mode 100644 index 000000000..a474c3cb6 --- /dev/null +++ b/3rdParty/metis/metis-5.1.1/libmetis/wspace.c @@ -0,0 +1,214 @@ +/*! +\file +\brief Functions dealing with memory allocation and workspace management + +\date Started 2/24/96 +\author George +\author Copyright 1997-2009, Regents of the University of Minnesota +\version $Id: wspace.c 10492 2011-07-06 09:28:42Z karypis $ +*/ + +#include "metislib.h" + + +/*************************************************************************/ +/*! This function allocates memory for the workspace */ +/*************************************************************************/ +void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph) +{ + size_t coresize; + + switch (ctrl->optype) { + case METIS_OP_PMETIS: + coresize = 3*(graph->nvtxs+1)*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t); + break; + default: + coresize = 4*(graph->nvtxs+1)*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + + 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t); + } + /*coresize = 0;*/ + ctrl->mcore = gk_mcoreCreate(coresize); + + ctrl->nbrpoolsize = 0; + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function allocates refinement-specific memory for the workspace */ +/*************************************************************************/ +void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize) +{ + ctrl->nbrpoolsize = nbrpoolsize; + ctrl->nbrpoolcpos = 0; + ctrl->nbrpoolreallocs = 0; + + switch (ctrl->objtype) { + case METIS_OBJTYPE_CUT: + ctrl->cnbrpool = (cnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(cnbr_t), + "AllocateRefinementWorkSpace: cnbrpool"); + break; + + case METIS_OBJTYPE_VOL: + ctrl->vnbrpool = (vnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(vnbr_t), + "AllocateRefinementWorkSpace: vnbrpool"); + break; + + default: + gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype); + } + + + /* Allocate the memory for the sparse subdomain graph */ + if (ctrl->minconn) { + ctrl->pvec1 = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec1"); + ctrl->pvec2 = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec2"); + ctrl->maxnads = ismalloc(ctrl->nparts, INIT_MAXNAD, "AllocateRefinementWorkSpace: maxnads"); + ctrl->nads = imalloc(ctrl->nparts, "AllocateRefinementWorkSpace: nads"); + ctrl->adids = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adids"); + ctrl->adwgts = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adwgts"); + } +} + + +/*************************************************************************/ +/*! This function frees the workspace */ +/*************************************************************************/ +void FreeWorkSpace(ctrl_t *ctrl) +{ + gk_mcoreDestroy(&ctrl->mcore, ctrl->dbglvl&METIS_DBG_INFO); + + IFSET(ctrl->dbglvl, METIS_DBG_INFO, + printf(" nbrpool statistics\n" + " nbrpoolsize: %12zu nbrpoolcpos: %12zu\n" + " nbrpoolreallocs: %12zu\n\n", + ctrl->nbrpoolsize, ctrl->nbrpoolcpos, + ctrl->nbrpoolreallocs)); + + gk_free((void **)&ctrl->cnbrpool, &ctrl->vnbrpool, LTERM); + ctrl->nbrpoolsize = 0; + ctrl->nbrpoolcpos = 0; + + if (ctrl->minconn) { + iFreeMatrix(&(ctrl->adids), ctrl->nparts, INIT_MAXNAD); + iFreeMatrix(&(ctrl->adwgts), ctrl->nparts, INIT_MAXNAD); + + gk_free((void **)&ctrl->pvec1, &ctrl->pvec2, + &ctrl->maxnads, &ctrl->nads, LTERM); + } +} + + +/*************************************************************************/ +/*! This function allocate space from the workspace/heap */ +/*************************************************************************/ +void *wspacemalloc(ctrl_t *ctrl, size_t nbytes) +{ + return gk_mcoreMalloc(ctrl->mcore, nbytes); +} + + +/*************************************************************************/ +/*! This function sets a marker in the stack of malloc ops to be used + subsequently for freeing purposes */ +/*************************************************************************/ +void wspacepush(ctrl_t *ctrl) +{ + gk_mcorePush(ctrl->mcore); +} + + +/*************************************************************************/ +/*! This function frees all mops since the last push */ +/*************************************************************************/ +void wspacepop(ctrl_t *ctrl) +{ + gk_mcorePop(ctrl->mcore); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +idx_t *iwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (idx_t *)wspacemalloc(ctrl, n*sizeof(idx_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +real_t *rwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (real_t *)wspacemalloc(ctrl, n*sizeof(real_t)); +} + + +/*************************************************************************/ +/*! This function allocate space from the core */ +/*************************************************************************/ +ikv_t *ikvwspacemalloc(ctrl_t *ctrl, idx_t n) +{ + return (ikv_t *)wspacemalloc(ctrl, n*sizeof(ikv_t)); +} + + +/*************************************************************************/ +/*! This function resets the cnbrpool */ +/*************************************************************************/ +void cnbrpoolReset(ctrl_t *ctrl) +{ + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function gets the next free index from cnbrpool */ +/*************************************************************************/ +idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs) +{ + ctrl->nbrpoolcpos += nnbrs; + + if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) { + ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2); + + ctrl->cnbrpool = (cnbr_t *)gk_realloc(ctrl->cnbrpool, + ctrl->nbrpoolsize*sizeof(cnbr_t), "cnbrpoolGet: cnbrpool"); + ctrl->nbrpoolreallocs++; + } + + return ctrl->nbrpoolcpos - nnbrs; +} + + +/*************************************************************************/ +/*! This function resets the vnbrpool */ +/*************************************************************************/ +void vnbrpoolReset(ctrl_t *ctrl) +{ + ctrl->nbrpoolcpos = 0; +} + + +/*************************************************************************/ +/*! This function gets the next free index from vnbrpool */ +/*************************************************************************/ +idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs) +{ + ctrl->nbrpoolcpos += nnbrs; + + if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) { + ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2); + + ctrl->vnbrpool = (vnbr_t *)gk_realloc(ctrl->vnbrpool, + ctrl->nbrpoolsize*sizeof(vnbr_t), "vnbrpoolGet: vnbrpool"); + ctrl->nbrpoolreallocs++; + } + + return ctrl->nbrpoolcpos - nnbrs; +} + diff --git a/CMake/cmake_config_files/BILBO.config.cmake b/CMake/cmake_config_files/BILBO.config.cmake index 3c7f0f728..093fe3200 100644 --- a/CMake/cmake_config_files/BILBO.config.cmake +++ b/CMake/cmake_config_files/BILBO.config.cmake @@ -3,11 +3,3 @@ # Responsible: Soeren Peters # OS: MacOS X ################################################################################# - -################################################################################# -# METIS -################################################################################# -SET(METIS_INCLUDEDIR "/usr/local/include") -SET(METIS_DEBUG_LIBRARY "/usr/local/lib/libmetis.a") -SET(METIS_RELEASE_LIBRARY "/usr/local/lib/libmetis.a") - diff --git a/CMake/cmake_config_files/ELLADAN.config.cmake b/CMake/cmake_config_files/ELLADAN.config.cmake index 80291455e..ff87d1b9d 100644 --- a/CMake/cmake_config_files/ELLADAN.config.cmake +++ b/CMake/cmake_config_files/ELLADAN.config.cmake @@ -5,10 +5,5 @@ ################################################################################# set(NVCUDASAMPLES_ROOT "~/cuda-samples/Common") -################################################################################# -# METIS -################################################################################# -set(METIS_INCLUDEDIR "/usr/include") -set(METIS_DEBUG_LIBRARY "/usr/lib/x86_64-linux-gnu/libmetis.so") -set(METIS_RELEASE_LIBRARY "/usr/lib/x86_64-linux-gnu/libmetis.so") + diff --git a/cpu.cmake b/cpu.cmake index 0f8c9a5ac..fd33baa91 100644 --- a/cpu.cmake +++ b/cpu.cmake @@ -75,6 +75,10 @@ IF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ia64") LIST(APPEND VF_COMPILER_DEFINITION _M_IA64) ENDIF() +if(${USE_METIS} AND NOT METIS_INCLUDEDIR) + add_subdirectory(${VF_THIRD_DIR}/metis/metis-5.1.1) +endif() + add_subdirectory(${VF_THIRD_DIR}/MuParser) diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt index a05143324..3121ce925 100644 --- a/src/cpu/VirtualFluidsCore/CMakeLists.txt +++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt @@ -1,7 +1,9 @@ -IF(${USE_METIS}) +IF(${USE_METIS} AND METIS_RELEASE_LIBRARY) SET(LINK_LIBRARY optimized ${METIS_RELEASE_LIBRARY} debug ${METIS_DEBUG_LIBRARY}) SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY}) +ELSE() + SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} metis) ENDIF() IF(${USE_VTK}) @@ -37,8 +39,9 @@ target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/Co target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/Utilities) -target_include_directories(${library_name} PUBLIC ${METIS_INCLUDEDIR}) - +IF(${USE_METIS} AND METIS_INCLUDEDIR) + target_include_directories(${library_name} PUBLIC ${METIS_INCLUDEDIR}) +ENDIF() IF(${USE_BOOST}) target_include_directories(${library_name} PRIVATE ${Boost_INCLUDE_DIR}) -- GitLab