From d3ff7a4a853e9492d31ff891f745c26cbbf3b147 Mon Sep 17 00:00:00 2001
From: Soeren Peters <peters@irmb.tu-bs.de>
Date: Tue, 6 Apr 2021 10:36:22 +0200
Subject: [PATCH] Added metis 5.1.0 and use it in cpu module instead of 5.1.1.

---
 3rdParty/metis/metis-5.1.0/CMakeLists.txt     |    24 +
 3rdParty/metis/metis-5.1.0/Changelog          |   286 +
 3rdParty/metis/metis-5.1.0/GKlib/BUILD.txt    |    25 +
 .../metis/metis-5.1.0/GKlib/CMakeLists.txt    |    21 +
 3rdParty/metis/metis-5.1.0/GKlib/GKlib.h      |    84 +
 .../metis/metis-5.1.0/GKlib/GKlibSystem.cmake |   129 +
 3rdParty/metis/metis-5.1.0/GKlib/Makefile     |    76 +
 3rdParty/metis/metis-5.1.0/GKlib/b64.c        |    95 +
 3rdParty/metis/metis-5.1.0/GKlib/blas.c       |    36 +
 .../GKlib/conf/check_thread_storage.c         |     5 +
 3rdParty/metis/metis-5.1.0/GKlib/csr.c        |  2010 +++
 3rdParty/metis/metis-5.1.0/GKlib/error.c      |   214 +
 3rdParty/metis/metis-5.1.0/GKlib/evaluate.c   |   132 +
 3rdParty/metis/metis-5.1.0/GKlib/fkvkselect.c |   142 +
 3rdParty/metis/metis-5.1.0/GKlib/fs.c         |   225 +
 3rdParty/metis/metis-5.1.0/GKlib/getopt.c     |   854 ++
 3rdParty/metis/metis-5.1.0/GKlib/gk_arch.h    |    71 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_defs.h    |    69 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_externs.h |    25 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_getopt.h  |    64 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_macros.h  |   153 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_mkblas.h  |   201 +
 .../metis/metis-5.1.0/GKlib/gk_mkmemory.h     |   142 +
 .../metis/metis-5.1.0/GKlib/gk_mkpqueue.h     |   437 +
 .../metis/metis-5.1.0/GKlib/gk_mkpqueue2.h    |   215 +
 .../metis/metis-5.1.0/GKlib/gk_mkrandom.h     |   123 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_mksort.h  |   273 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_mkutils.h |    40 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_proto.h   |   381 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_struct.h  |   268 +
 3rdParty/metis/metis-5.1.0/GKlib/gk_types.h   |    38 +
 3rdParty/metis/metis-5.1.0/GKlib/gkregex.c    | 10704 ++++++++++++++++
 3rdParty/metis/metis-5.1.0/GKlib/gkregex.h    |   556 +
 3rdParty/metis/metis-5.1.0/GKlib/graph.c      |  1574 +++
 3rdParty/metis/metis-5.1.0/GKlib/htable.c     |   247 +
 3rdParty/metis/metis-5.1.0/GKlib/io.c         |   384 +
 3rdParty/metis/metis-5.1.0/GKlib/itemsets.c   |   210 +
 3rdParty/metis/metis-5.1.0/GKlib/mcore.c      |   393 +
 3rdParty/metis/metis-5.1.0/GKlib/memory.c     |   252 +
 .../metis/metis-5.1.0/GKlib/ms_inttypes.h     |   301 +
 3rdParty/metis/metis-5.1.0/GKlib/ms_stat.h    |    22 +
 3rdParty/metis/metis-5.1.0/GKlib/ms_stdint.h  |   222 +
 3rdParty/metis/metis-5.1.0/GKlib/omp.c        |    27 +
 3rdParty/metis/metis-5.1.0/GKlib/pdb.c        |   460 +
 3rdParty/metis/metis-5.1.0/GKlib/pqueue.c     |    25 +
 3rdParty/metis/metis-5.1.0/GKlib/random.c     |   134 +
 3rdParty/metis/metis-5.1.0/GKlib/rw.c         |   103 +
 3rdParty/metis/metis-5.1.0/GKlib/seq.c        |   174 +
 3rdParty/metis/metis-5.1.0/GKlib/sort.c       |   327 +
 3rdParty/metis/metis-5.1.0/GKlib/string.c     |   529 +
 .../metis-5.1.0/GKlib/test/CMakeLists.txt     |    13 +
 .../metis-5.1.0/GKlib/test/Makefile.in.old    |   258 +
 .../metis/metis-5.1.0/GKlib/test/Makefile.old |    39 +
 3rdParty/metis/metis-5.1.0/GKlib/test/fis.c   |   286 +
 .../metis/metis-5.1.0/GKlib/test/gkgraph.c    |   351 +
 .../metis/metis-5.1.0/GKlib/test/gksort.c     |   346 +
 3rdParty/metis/metis-5.1.0/GKlib/test/rw.c    |   307 +
 .../metis/metis-5.1.0/GKlib/test/strings.c    |    82 +
 3rdParty/metis/metis-5.1.0/GKlib/timers.c     |    52 +
 3rdParty/metis/metis-5.1.0/GKlib/tokenizer.c  |    77 +
 3rdParty/metis/metis-5.1.0/GKlib/util.c       |   108 +
 3rdParty/metis/metis-5.1.0/LICENSE.txt        |    18 +
 .../metis/metis-5.1.0/include/CMakeLists.txt  |     3 +
 3rdParty/metis/metis-5.1.0/include/metis.h    |   354 +
 .../metis/metis-5.1.0/libmetis/CMakeLists.txt |    22 +
 3rdParty/metis/metis-5.1.0/libmetis/auxapi.c  |    43 +
 3rdParty/metis/metis-5.1.0/libmetis/balance.c |   498 +
 .../metis/metis-5.1.0/libmetis/bucketsort.c   |    44 +
 .../metis/metis-5.1.0/libmetis/checkgraph.c   |   263 +
 3rdParty/metis/metis-5.1.0/libmetis/coarsen.c |  1132 ++
 .../metis/metis-5.1.0/libmetis/compress.c     |   229 +
 3rdParty/metis/metis-5.1.0/libmetis/contig.c  |   699 +
 3rdParty/metis/metis-5.1.0/libmetis/debug.c   |   461 +
 3rdParty/metis/metis-5.1.0/libmetis/defs.h    |    60 +
 3rdParty/metis/metis-5.1.0/libmetis/fm.c      |   543 +
 3rdParty/metis/metis-5.1.0/libmetis/fortran.c |   142 +
 3rdParty/metis/metis-5.1.0/libmetis/frename.c |   136 +
 3rdParty/metis/metis-5.1.0/libmetis/gklib.c   |   120 +
 .../metis/metis-5.1.0/libmetis/gklib_defs.h   |    53 +
 .../metis/metis-5.1.0/libmetis/gklib_rename.h |   122 +
 3rdParty/metis/metis-5.1.0/libmetis/graph.c   |   274 +
 .../metis/metis-5.1.0/libmetis/initpart.c     |   630 +
 3rdParty/metis/metis-5.1.0/libmetis/kmetis.c  |   243 +
 3rdParty/metis/metis-5.1.0/libmetis/kwayfm.c  |  1852 +++
 .../metis/metis-5.1.0/libmetis/kwayrefine.c   |   672 +
 3rdParty/metis/metis-5.1.0/libmetis/macros.h  |   258 +
 3rdParty/metis/metis-5.1.0/libmetis/mcutil.c  |   330 +
 3rdParty/metis/metis-5.1.0/libmetis/mesh.c    |   412 +
 .../metis/metis-5.1.0/libmetis/meshpart.c     |   262 +
 .../metis/metis-5.1.0/libmetis/metislib.h     |    41 +
 3rdParty/metis/metis-5.1.0/libmetis/minconn.c |   729 ++
 .../metis/metis-5.1.0/libmetis/mincover.c     |   259 +
 3rdParty/metis/metis-5.1.0/libmetis/mmd.c     |   593 +
 3rdParty/metis/metis-5.1.0/libmetis/ometis.c  |   701 +
 3rdParty/metis/metis-5.1.0/libmetis/options.c |   532 +
 .../metis/metis-5.1.0/libmetis/parmetis.c     |   723 ++
 3rdParty/metis/metis-5.1.0/libmetis/pmetis.c  |   387 +
 3rdParty/metis/metis-5.1.0/libmetis/proto.h   |   348 +
 3rdParty/metis/metis-5.1.0/libmetis/refine.c  |   211 +
 3rdParty/metis/metis-5.1.0/libmetis/rename.h  |   266 +
 .../metis/metis-5.1.0/libmetis/separator.c    |   176 +
 3rdParty/metis/metis-5.1.0/libmetis/sfm.c     |   612 +
 3rdParty/metis/metis-5.1.0/libmetis/srefine.c |   163 +
 3rdParty/metis/metis-5.1.0/libmetis/stat.c    |   179 +
 .../metis/metis-5.1.0/libmetis/stdheaders.h   |    29 +
 3rdParty/metis/metis-5.1.0/libmetis/struct.h  |   206 +
 3rdParty/metis/metis-5.1.0/libmetis/timing.c  |    63 +
 3rdParty/metis/metis-5.1.0/libmetis/util.c    |   138 +
 3rdParty/metis/metis-5.1.0/libmetis/wspace.c  |   214 +
 cpu.cmake                                     |     2 +-
 110 files changed, 41867 insertions(+), 1 deletion(-)
 create mode 100644 3rdParty/metis/metis-5.1.0/CMakeLists.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/Changelog
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/BUILD.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/CMakeLists.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/GKlib.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/GKlibSystem.cmake
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/Makefile
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/b64.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/blas.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/conf/check_thread_storage.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/csr.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/error.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/evaluate.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/fkvkselect.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/fs.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/getopt.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_arch.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_defs.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_externs.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_getopt.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_macros.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkblas.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkmemory.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue2.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkrandom.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mksort.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_mkutils.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_proto.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_struct.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gk_types.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gkregex.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/gkregex.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/graph.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/htable.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/io.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/itemsets.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/mcore.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/memory.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/ms_inttypes.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/ms_stat.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/ms_stdint.h
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/omp.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/pdb.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/pqueue.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/random.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/rw.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/seq.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/sort.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/string.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/CMakeLists.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.in.old
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.old
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/fis.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/gkgraph.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/gksort.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/rw.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/test/strings.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/timers.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/tokenizer.c
 create mode 100644 3rdParty/metis/metis-5.1.0/GKlib/util.c
 create mode 100644 3rdParty/metis/metis-5.1.0/LICENSE.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/include/CMakeLists.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/include/metis.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/CMakeLists.txt
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/auxapi.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/balance.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/bucketsort.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/checkgraph.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/coarsen.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/compress.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/contig.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/debug.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/defs.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/fm.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/fortran.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/frename.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/gklib.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/gklib_defs.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/gklib_rename.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/graph.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/initpart.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/kmetis.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/kwayfm.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/kwayrefine.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/macros.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/mcutil.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/mesh.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/meshpart.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/metislib.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/minconn.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/mincover.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/mmd.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/ometis.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/options.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/parmetis.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/pmetis.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/proto.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/refine.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/rename.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/separator.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/sfm.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/srefine.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/stat.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/stdheaders.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/struct.h
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/timing.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/util.c
 create mode 100644 3rdParty/metis/metis-5.1.0/libmetis/wspace.c

diff --git a/3rdParty/metis/metis-5.1.0/CMakeLists.txt b/3rdParty/metis/metis-5.1.0/CMakeLists.txt
new file mode 100644
index 000000000..6528e7941
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.0)
+project(METIS)
+
+set(GKLIB_PATH "${CMAKE_CURRENT_SOURCE_DIR}/GKlib" CACHE PATH "path to GKlib")
+
+# Configure libmetis library.
+if(BUILD_SHARED_LIBS)
+  set(METIS_LIBRARY_TYPE SHARED)
+else()
+  set(METIS_LIBRARY_TYPE STATIC)
+endif()
+
+include(${GKLIB_PATH}/GKlibSystem.cmake)
+
+
+add_subdirectory("libmetis")
+
+target_include_directories(metis PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libmetis)
+target_include_directories(metis PRIVATE ${GKLIB_PATH})
+
+target_include_directories(metis PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+
+groupTarget(metis ${thirdFolder})
\ No newline at end of file
diff --git a/3rdParty/metis/metis-5.1.0/Changelog b/3rdParty/metis/metis-5.1.0/Changelog
new file mode 100644
index 000000000..21308801d
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/Changelog
@@ -0,0 +1,286 @@
+
+metis-5.1.0
+------------------------------------------------------------------------
+r13937 | karypis | 2013-03-29 23:08:21 -0500 (Fri, 29 Mar 2013) 
+
+- Further extended the 2-hop coarsening scheme introduced in 5.0.2 for
+  for graphs with highly variable degree distribution (e.g., power-law). 
+  This coarsening scheme is automatically used when the standard 
+  1-hop-based scheme leaves a large fraction of the vertices of the 
+  graph unmatched. It leads to better quality partitionings, lower 
+  memory utilization, and faster execution time. In principle, this 
+  scheme will never be triggered for graphs/matrices appearing in 
+  scientific computations derived from FE meshes. However, if you 
+  notice that the quality of the solutions is significantly worse, 
+  this 2-hop matching can be turned off by using the '-no2hop' command 
+  line option and the associated options[] parameter (as described 
+  in the manual).
+- Fixed 0/1 numbering issue with mesh partitioning routines (flyspray 
+   issue #109)
+
+
+metis-5.0.3
+------------------------------------------------------------------------
+r13822 | karypis | 2013-03-11 14:40:11 -0500 (Mon, 11 Mar 2013)
+
+- Fixed the bug that was introduced in 5.x for creating nodal graphs
+  from meshes (flyspray issue #107).
+- Changed the license to Apache Version 2.
+
+
+metis-5.0.2
+------------------------------------------------------------------------
+r10974 | karypis | 2011-10-29 18:24:32 -0500 (Sat, 29 Oct 2011)
+
+- Fixed issue with high-degree vertices and mask-based compression.
+- Fixed issue with wrong COARSENING_FRACTION.
+- Modified coarsening schemes to better support non FE graphs.
+
+
+metis-5.0.1
+------------------------------------------------------------------------
+r10709 | karypis | 2011-08-31 16:07:57 -0500 (Wed, 31 Aug 2011)
+
+- Fixed critical bug in the mesh partitioning routines.
+
+
+metis-5.0
+------------------------------------------------------------------------
+r10667 | karypis | 2011-08-04 00:35:30 -0500 (Thu, 04 Aug 2011) 
+
+- Updated/corrected error messages.
+- Addressed some build issues.
+
+
+metis-5.0rc3
+------------------------------------------------------------------------
+r10560 | karypis | 2011-07-13 08:19:10 -0500 (Wed, 13 Jul 2011)
+
+- Fixed various bugs that were identified by testers.
+- Some minor performance and quality improvements.
+- Addressed some build issues.
+
+
+metis-5.0rc2
+------------------------------------------------------------------------
+r10496 | karypis | 2011-07-06 11:04:45 -0500 (Wed, 06 Jul 2011)
+
+- Various run-time and quality optimizations.
+- Option error-checking.
+- Signal-based heap cleanup on error. Metis API routines will not
+  return nicely and cleanup all memory that may have allocated.
+- Reduced memory requirements.
+- Fixed various bugs identified in rc1.
+- Added back Fortran support in the form of alternate API names
+  (see libmetis/frename.h).
+- Minor code changes to accommodate ParMetis 4.0.
+
+
+metis-5.0rc1
+------------------------------------------------------------------------
+r10227 | karypis | 2011-06-13 23:35:05 -0500 (Mon, 13 Jun 2011)
+
+- A nearly complete re-write of Metis' code-based that changed expanded
+  the functionality of the command-line programs and API routines.
+- Multi-constraint partitioning can be used in conjunction with
+  minimization of the total communication volume.
+- All graph and mesh partitioning routines take as input the target
+  sizes of the partitions, which among others, allow them to compute
+  partitioning solutions that are well-suited for parallel architectures
+  with heterogeneous computing capabilities.
+- When multi-constraint partitioning is used, the target sizes of the
+  partitions are specified on a per partition-constraint pair.
+- The multilevel k-way partitioning algorithms can compute a
+  partitioning solution in which each partition is contiguous.
+- All partitioning and ordering routines can compute multiple different
+  solutions and select the best as the final solution.
+- The mesh partitioning and mesh-to-graph conversion routines can
+  operate on mixed element meshes.
+- The command-line programs provide full access to the entire set of
+  capabilities provided by Metis' API.
+- Re-written the memory management subsystem to reduce overall memory
+  requirements.
+
+
+
+metis-5.0pre2
+------------------------------------------------------------------------
+r1437 | karypis | 2007-04-07 23:16:16 -0500 (Sat, 07 Apr 2007)  
+
+- Added installation instructions and change-logs.
+- Tested 32bit & 64bit on 64bit architectures and passed tests.
+- Tested 32bit on 32bit architectures and passed tests.
+- strtoidx() addition for portable input file parsing
+- Restructured the internal memory allocation schemes for graph and
+  refinement data. This should enhance portability and make the code
+  easier to maintain.
+- Fixed some bad memory allocation calls (i.e., sizeof(x)/sizeof(idxtype). 
+  However, there are tons of those and need to be corrected once and for
+  all by eliminating workspace and the associated mallocs.
+- Added mprint/mscanf family of functions for portable formated I/O
+  of the idxtype datatype. The specifier for this datatype is %D.
+  All library routines use this function for printing. 
+  The implementation of these routines is not very efficient, but
+  that should do for now (in principle these routines should not be
+  used unless debugging).
+- Incorporated GKlib into METIS, which replaced many of its internal
+  functions. GKlib's malloc interface will enable graceful and clean
+  aborts (i.e., free all internally allocated memory) on fatal errors.
+  This will probably be available in the next pre-release.
+- Fixed the problems associated with metis.h that were identified by
+  David (flyspray Issue #9).
+
+
+METIS 4.0.2, 3/10/04
+------------------------------------------------------------------------------
+- Fixed a problem with weighted graphs and ometis.c
+
+
+METIS 4.0.1, 11/29/98
+------------------------------------------------------------------------------
+This is mostly a bug-fix release
+
+  - Fixed some bugs in the multi-constraint partitioning routines
+  - Fixed some bugs in the volume-minimization routines
+
+
+
+METIS 4.0.0, 9/20/98
+------------------------------------------------------------------------------
+METIS 4.0 contains a number of changes over the previous major release (ver 
+3.0.x). Most of these changes are concentrated on the graph and mesh 
+partitioning routines and they do not affect the sparse matrix re-ordering 
+routines. Here is a list of the major changes:
+
+  Multi-Constraint Partitioning
+  -----------------------------
+  METIS now includes partitioning routines that can be used to a partition
+  a graph in the presence of multiple balancing constraints.
+
+  Minimizing the Total Communication Volume
+  -----------------------------------------
+  METIS now includes partitioning routines whose objective is to minimize
+  the total communication volume (as opposed to minimizing the edge-cut).
+
+  Minimizing the Maximum Connectivity of the Subdomains
+  -----------------------------------------------------
+  The k-way partitioning routines in METIS can now directly minimize the number
+  of adjacent subdomains. For most graphs corresponding to finite element 
+  meshes, METIS is able to significantly reduce the maximum (and total) number of 
+  adjacent subdomains.
+
+
+
+
+METIS 3.0.6, 1/28/98
+-------------------------------------------------------------------------------
+  - Fixed some problems when too many partitions were asked, and each partition
+    end up having 0 vertices
+  - Fixed some bugs in the I/O routines
+  - Added support for the g77 compiler under Linux
+
+
+METIS 3.0.5, 12/22/97
+-------------------------------------------------------------------------------
+  - Fixed problems on 64-bit architectures (eg., -64 option on SGIs).
+  - Added some options in Makefile.in
+
+
+METIS 3.0.4, 12/1/97
+-------------------------------------------------------------------------------
+  Fixed a memory leak in the ordering code.
+
+
+METIS 3.0.3, 11/5/97
+-------------------------------------------------------------------------------
+  This is mostly a bug-fix release with just a few additions
+
+  Added functionality
+    - Added support for quadrilateral elements.
+    - Added a routine METIS_EstimateMemory that estimates the amount of
+      memory that will be allocated by METIS. This is useful in determining
+      if a problem can run on your system.
+    - Added hooks to allow PARMETIS to use the orderings produced by METIS.
+      This is hidden from the user but it will be used in the next release
+      of PARMETIS.
+
+  Bug-fixes
+    - Fixed a bug related to memory allocation. This should somewhat reduce the 
+      overall memory used by METIS.
+    - Fixed some bugs in the 'graphchk' program in the case of weighted graphs.
+    - Removed some code corresponding to unused options.
+    - Fixed some minor bugs in the node-refinement code
+      
+
+
+-------------------------------------------------------------------------------
+METIS 3.0 contains a number of changes over METIS 2.0.
+The major changes are the following:
+
+  General Changes
+  ---------------
+    1.  Added code to directly partition finite element meshes.
+
+    2.  Added code to convert finite element meshes into graphs so they
+        can be used by METIS.
+
+    1.  The names, calling sequences, and options  of the routines in 
+        METISlib have been changed.
+
+    2.  Better support has been added for Fortran programs.
+
+    3.  Eliminated the 'metis' program. The only way to tune METIS's
+        behavior is to use METISlib.
+
+    4.  Improved memory management. METIS should now only abort if truly
+        there is no more memory left in the system.
+
+
+  Graph Partitioning
+  ------------------
+    1.  Added partitioning routines that can be used to compute a partition 
+        with prescribed partition weights. For example, they can be used to 
+        compute a 3-way partition such that partition 1 has 50% of the weight, 
+        partition 2 has 20% of the way, and partition 3 has 30% of the weight. 
+
+    2.  Improved the speed of the k-way partitioning algorithm (kmetis). The
+        new code has better cache locality which dramatically improves the 
+        speed for large graphs. A factor of 4 speedup can be obtained for
+        certain graphs. METIS can now partition a 4 million node graph
+        in well under a minute on a MIPS R10000.
+
+    3.  Eliminated some of the options that were seldom used. 
+  
+
+  Fill-Reducing Orderings
+  ----------------------
+    1.  Added a node based ordering code `onmetis' that greatly improves 
+        ordering quality.
+
+    2.  Improved the quality of the orderings produced by the original
+        edge-based ordering code (it is now called 'oemetis').
+
+    3.  METIS can now analyze the graph and try to compress together 
+        nodes with identical sparsity pattern. For some problems, this 
+        significantly reduces ordering time 
+
+    4.  METIS can now prune dense columns prior to ordering. This can be
+        helpful for LP matrices.
+        
+
+  Mesh Partitioning
+  -----------------
+    1.  METIS can now directly partition the element node array of finite
+        element meshes. It produces two partitioning vectors. One for the
+        elements and one for the nodes. METIS supports the following 
+        elements: triangles, tetrahedra, hexahedra
+
+
+  Mesh-To-Graph Conversion Routines
+  ---------------------------------
+    1.  METIS now includes a number of mesh conversion functions that can 
+        be used to create the dual and nodal graphs directly from the 
+        element connectivity arrays. These are highly optimized routines. 
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/BUILD.txt b/3rdParty/metis/metis-5.1.0/GKlib/BUILD.txt
new file mode 100644
index 000000000..cdb9987a9
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/BUILD.txt
@@ -0,0 +1,25 @@
+Building GKlib requires CMake 2.8. Once you've installed CMake run
+
+    $ make
+
+This will build the GKlib library in build/<arch>/. Options can be tweaked by
+running make config. For example,
+
+    $ make config openmp=ON
+    $ make
+
+will build GKlib will OpenMP support if it is available.
+
+GKlib can be installed with
+
+    $ make install
+
+and uninstalled with
+
+    $ make uninstall
+
+You can choose the installation prefix with make config:
+
+    $ make config prefix=~/local
+
+will cause GKlib to be install in the ~/local tree.
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/CMakeLists.txt b/3rdParty/metis/metis-5.1.0/GKlib/CMakeLists.txt
new file mode 100644
index 000000000..67b600aa6
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 2.8)
+project(GKlib)
+
+get_filename_component(abs "." ABSOLUTE)
+set(GKLIB_PATH ${abs})
+unset(abs)
+include(GKlibSystem.cmake)
+
+include_directories(".")
+add_library(GKlib STATIC ${GKlib_sources})
+if(UNIX)
+  target_link_libraries(GKlib m)
+endif(UNIX)
+
+include_directories("test")
+add_subdirectory("test")
+
+install(TARGETS GKlib
+  ARCHIVE DESTINATION lib
+  LIBRARY DESTINATION lib)
+install(FILES ${GKlib_includes} DESTINATION include)
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/GKlib.h b/3rdParty/metis/metis-5.1.0/GKlib/GKlib.h
new file mode 100644
index 000000000..492c90f2e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/GKlib.h
@@ -0,0 +1,84 @@
+/*
+ * GKlib.h
+ * 
+ * George's library of most frequently used routines
+ *
+ * $Id: GKlib.h 13005 2012-10-23 22:34:36Z karypis $
+ *
+ */
+
+#ifndef _GKLIB_H_
+#define _GKLIB_H_ 1
+
+#define GKMSPACE
+
+#if defined(_MSC_VER)
+#define __MSC__
+#endif
+#if defined(__ICC)
+#define __ICC__
+#endif
+
+
+#include "gk_arch.h" /*!< This should be here, prior to the includes */
+
+
+/*************************************************************************
+* Header file inclusion section
+**************************************************************************/
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+#include <time.h>
+#include <string.h>
+#include <limits.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <sys/stat.h>
+
+#if defined(__WITHPCRE__)
+  #include <pcreposix.h>
+#else
+  #if defined(USE_GKREGEX)
+    #include "gkregex.h"
+  #else
+    #include <regex.h>
+  #endif /* defined(USE_GKREGEX) */
+#endif /* defined(__WITHPCRE__) */
+
+
+
+#if defined(__OPENMP__) 
+#include <omp.h>
+#endif
+
+
+
+
+#include <gk_types.h>
+#include <gk_struct.h>
+#include <gk_externs.h>
+#include <gk_defs.h>
+#include <gk_macros.h>
+#include <gk_getopt.h>
+
+#include <gk_mksort.h>
+#include <gk_mkblas.h>
+#include <gk_mkmemory.h>
+#include <gk_mkpqueue.h>
+#include <gk_mkpqueue2.h>
+#include <gk_mkrandom.h>
+#include <gk_mkutils.h>
+
+#include <gk_proto.h>
+
+
+#endif  /* GKlib.h */
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/GKlibSystem.cmake b/3rdParty/metis/metis-5.1.0/GKlib/GKlibSystem.cmake
new file mode 100644
index 000000000..3fcc29108
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/GKlibSystem.cmake
@@ -0,0 +1,129 @@
+# Helper modules.
+include(CheckFunctionExists)
+include(CheckIncludeFile)
+
+# Setup options.
+option(GDB "enable use of GDB" OFF)
+option(ASSERT "turn asserts on" OFF)
+option(ASSERT2 "additional assertions" OFF)
+option(DEBUG "add debugging support" OFF)
+option(GPROF "add gprof support" OFF)
+option(OPENMP "enable OpenMP support" OFF)
+option(PCRE "enable PCRE support" OFF)
+option(GKREGEX "enable GKREGEX support" OFF)
+option(GKRAND "enable GKRAND support" OFF)
+
+# Add compiler flags.
+if(MSVC)
+  set(GKlib_COPTS "/Ox")
+  set(GKlib_COPTIONS "-DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE -DUSE_GKREGEX")
+elseif(MINGW)
+  set(GKlib_COPTS "-DUSE_GKREGEX")
+else()
+  set(GKlib_COPTS "-O3")
+  set(GKlib_COPTIONS "-DLINUX -D_FILE_OFFSET_BITS=64")
+endif(MSVC)
+if(CYGWIN)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DCYGWIN")
+endif(CYGWIN)
+if(CMAKE_COMPILER_IS_GNUCC)
+# GCC opts.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -std=c99 -fno-strict-aliasing")
+  if(NOT MINGW)
+      set(GKlib_COPTIONS "${GKlib_COPTIONS} -fPIC")
+  endif(NOT MINGW)
+# GCC warnings.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Wall -pedantic -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unknown-pragmas")
+elseif(${CMAKE_C_COMPILER_ID} MATCHES "Sun")
+# Sun insists on -xc99.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -xc99")
+endif(CMAKE_COMPILER_IS_GNUCC)
+
+# Find OpenMP if it is requested.
+if(OPENMP)
+  include(FindOpenMP)
+  if(OPENMP_FOUND)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__OPENMP__ ${OpenMP_C_FLAGS}")
+  else()
+    message(WARNING "OpenMP was requested but support was not found")
+  endif(OPENMP_FOUND)
+endif(OPENMP)
+
+
+# Add various definitions.
+if(GDB)
+  set(GKlib_COPTS "${GKlib_COPTS} -g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror")
+endif(GDB)
+
+
+if(DEBUG)
+  set(GKlib_COPTS "-g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DDEBUG")
+endif(DEBUG)
+
+if(GPROF)
+  set(GKlib_COPTS "-pg")
+endif(GPROF)
+
+if(NOT ASSERT)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG")
+endif(NOT ASSERT)
+
+if(NOT ASSERT2)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG2")
+endif(NOT ASSERT2)
+
+
+# Add various options
+if(PCRE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__WITHPCRE__")
+endif(PCRE)
+
+if(GKREGEX)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKREGEX")
+endif(GKREGEX)
+
+if(GKRAND)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKRAND")
+endif(GKRAND)
+
+
+# Check for features.
+check_include_file(execinfo.h HAVE_EXECINFO_H)
+if(HAVE_EXECINFO_H)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_EXECINFO_H")
+endif(HAVE_EXECINFO_H)
+
+check_function_exists(getline HAVE_GETLINE)
+if(HAVE_GETLINE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_GETLINE")
+endif(HAVE_GETLINE)
+
+
+# Custom check for TLS.
+if(MSVC)
+   set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=__declspec(thread)")
+else()
+  # This if checks if that value is cached or not.
+  if("${HAVE_THREADLOCALSTORAGE}" MATCHES "^${HAVE_THREADLOCALSTORAGE}$")
+    try_compile(HAVE_THREADLOCALSTORAGE
+      ${CMAKE_BINARY_DIR}
+      ${GKLIB_PATH}/conf/check_thread_storage.c)
+    if(HAVE_THREADLOCALSTORAGE)
+      message(STATUS "checking for thread-local storage - found")
+    else()
+      message(STATUS "checking for thread-local storage - not found")
+    endif()
+  endif()
+  if(NOT HAVE_THREADLOCALSTORAGE)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=")
+  endif()
+endif()
+
+# Finally set the official C flags.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GKlib_COPTIONS} ${GKlib_COPTS}")
+
+# Find GKlib sources.
+file(GLOB GKlib_sources ${GKLIB_PATH}/*.c)
+file(GLOB GKlib_includes ${GKLIB_PATH}/*.h)
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/Makefile b/3rdParty/metis/metis-5.1.0/GKlib/Makefile
new file mode 100644
index 000000000..d17b4f44c
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/Makefile
@@ -0,0 +1,76 @@
+# Configuration options.
+gdb      = not-set
+assert   = not-set
+assert2  = not-set
+debug    = not-set
+gprof    = not-set
+openmp   = not-set
+prefix   = not-set
+pcre     = not-set
+gkregex  = not-set
+gkrand   = not-set
+
+
+# Basically proxies everything to the builddir cmake.
+cputype = $(shell uname -m | sed "s/\\ /_/g")
+systype = $(shell uname -s)
+
+BUILDDIR = build/$(systype)-$(cputype)
+
+# Process configuration options.
+CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1
+ifneq ($(gdb), not-set)
+    CONFIG_FLAGS += -DGDB=$(gdb)
+endif
+ifneq ($(assert), not-set)
+    CONFIG_FLAGS += -DASSERT=$(assert)
+endif
+ifneq ($(assert2), not-set)
+    CONFIG_FLAGS += -DASSERT2=$(assert2)
+endif
+ifneq ($(debug), not-set)
+    CONFIG_FLAGS += -DDEBUG=$(debug)
+endif
+ifneq ($(gprof), not-set)
+    CONFIG_FLAGS += -DGPROF=$(gprof)
+endif
+ifneq ($(openmp), not-set)
+    CONFIG_FLAGS += -DOPENMP=$(openmp)
+endif
+ifneq ($(pcre), not-set)
+    CONFIG_FLAGS += -DPCRE=$(pcre)
+endif
+ifneq ($(gkregex), not-set)
+    CONFIG_FLAGS += -DGKREGEX=$(pcre)
+endif
+ifneq ($(gkrand), not-set)
+    CONFIG_FLAGS += -DGKRAND=$(pcre)
+endif
+ifneq ($(prefix), not-set)
+    CONFIG_FLAGS += -DCMAKE_INSTALL_PREFIX=$(prefix)
+endif
+
+define run-config
+mkdir -p $(BUILDDIR)
+cd $(BUILDDIR) && cmake $(CURDIR) $(CONFIG_FLAGS)
+endef
+
+all clean install: $(BUILDDIR)
+	make -C $(BUILDDIR) $@
+
+uninstall:
+	 xargs rm < $(BUILDDIR)/install_manifest.txt
+
+$(BUILDDIR):
+	$(run-config)
+
+config: distclean
+	$(run-config)
+
+distclean:
+	rm -rf $(BUILDDIR)
+
+remake:
+	find . -name CMakeLists.txt -exec touch {} ';'
+
+.PHONY: config distclean all clean install uninstall remake
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/b64.c b/3rdParty/metis/metis-5.1.0/GKlib/b64.c
new file mode 100644
index 000000000..afacd68a1
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/b64.c
@@ -0,0 +1,95 @@
+/*! 
+\file  b64.c
+\brief This file contains some simple 8bit-to-6bit encoding/deconding routines
+
+Most of these routines are outdated and should be converted using glibc's equivalent
+routines.
+
+\date   Started 2/22/05
+\author George
+\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+
+\verbatim 
+$Copyright$ 
+$License$
+\endverbatim
+
+*/
+
+
+#include "GKlib.h"
+
+#define B64OFFSET       48      /* This is the '0' number */
+
+
+/******************************************************************************
+* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters
+*******************************************************************************/
+void encodeblock(unsigned char *in, unsigned char *out)
+{
+  out[0] = (in[0] >> 2);
+  out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4));
+  out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6));
+  out[3] = (in[2] & 0x3f);
+
+  out[0] += B64OFFSET;
+  out[1] += B64OFFSET;
+  out[2] += B64OFFSET;
+  out[3] += B64OFFSET;
+
+//  printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]);
+}
+
+/******************************************************************************
+* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes
+*******************************************************************************/
+void decodeblock(unsigned char *in, unsigned char *out)
+{   
+  in[0] -= B64OFFSET;
+  in[1] -= B64OFFSET;
+  in[2] -= B64OFFSET;
+  in[3] -= B64OFFSET;
+
+  out[0] = (in[0] << 2 | in[1] >> 4);
+  out[1] = (in[1] << 4 | in[2] >> 2);
+  out[2] = (in[2] << 6 | in[3]);
+}
+
+
+/******************************************************************************
+* This function encodes an input array of bytes into a base64 encoding. Memory
+* for the output array is assumed to have been allocated by the calling program
+* and be sufficiently large. The output string is NULL terminated.
+*******************************************************************************/
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%3 != 0)
+    gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=3, j+=4) 
+    encodeblock(inbuffer+i, outbuffer+j);
+
+//printf("%d %d\n", nbytes, j);
+  outbuffer[j] = '\0';
+}
+
+
+
+/******************************************************************************
+* This function decodes an input array of base64 characters into their actual
+* 8-bit codes. Memory * for the output array is assumed to have been allocated 
+* by the calling program and be sufficiently large. The padding is discarded.
+*******************************************************************************/
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%4 != 0)
+    gk_errexit(SIGERR, "GKDecodeBase64: Input buffer size should be a multiple of 4! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=4, j+=3) 
+    decodeblock(inbuffer+i, outbuffer+j);
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/blas.c b/3rdParty/metis/metis-5.1.0/GKlib/blas.c
new file mode 100644
index 000000000..b65cd0264
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/blas.c
@@ -0,0 +1,36 @@
+/*!
+\file blas.c
+\brief This file contains GKlib's implementation of BLAS-like routines
+
+The BLAS routines that are currently implemented are mostly level-one.
+They follow a naming convention of the type gk_[type][name], where
+[type] is one of c, i, f, and d, based on C's four standard scalar
+datatypes of characters, integers, floats, and doubles.
+
+These routines are implemented using a generic macro template,
+which is used for code generation.
+
+\date   Started 9/28/95
+\author George
+\version\verbatim $Id: blas.c 11848 2012-04-20 13:47:37Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Use the templates to generate BLAS routines for the scalar data types */
+/*************************************************************************/
+GK_MKBLAS(gk_c,   char,     int)
+GK_MKBLAS(gk_i,   int,      int)
+GK_MKBLAS(gk_i32, int32_t,  int32_t)
+GK_MKBLAS(gk_i64, int64_t,  int64_t)
+GK_MKBLAS(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS(gk_f,   float,    float)
+GK_MKBLAS(gk_d,   double,   double)
+GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/conf/check_thread_storage.c b/3rdParty/metis/metis-5.1.0/GKlib/conf/check_thread_storage.c
new file mode 100644
index 000000000..e6e1e980e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/conf/check_thread_storage.c
@@ -0,0 +1,5 @@
+extern __thread int x;
+
+int main(int argc, char **argv) {
+  return 0;
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/csr.c b/3rdParty/metis/metis-5.1.0/GKlib/csr.c
new file mode 100644
index 000000000..a19d793bd
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/csr.c
@@ -0,0 +1,2010 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with CSR matrices
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: csr.c 13437 2013-01-11 21:54:10Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a CSR matrix and initializes it 
+    \returns the allocated matrix. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Create()
+{
+  gk_csr_t *mat;
+
+  mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat");
+
+  gk_csr_Init(mat);
+
+  return mat;
+}
+
+
+/*************************************************************************/
+/*! Initializes the matrix 
+    \param mat is the matrix to be initialized.
+*/
+/*************************************************************************/
+void gk_csr_Init(gk_csr_t *mat)
+{
+  memset(mat, 0, sizeof(gk_csr_t));
+  mat->nrows = mat->ncols = -1;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for matrix.
+    \param mat is the matrix to be freed.
+*/
+/*************************************************************************/
+void gk_csr_Free(gk_csr_t **mat)
+{
+  if (*mat == NULL)
+    return;
+  gk_csr_FreeContents(*mat);
+  gk_free((void **)mat, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the matrix's different fields and
+    sets them to NULL.
+    \param mat is the matrix whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_csr_FreeContents(gk_csr_t *mat)
+{
+  gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, &mat->rowids,
+          &mat->colptr, &mat->colind, &mat->colval, &mat->colids, 
+          &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums, 
+          &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols, 
+          &mat->rwgts, &mat->cwgts, 
+          LTERM);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a matrix.
+    \param mat is the matrix to be duplicated.
+    \returns the newly created copy of the matrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat)
+{
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = mat->nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr, 
+                            gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr"));
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(mat->nrows, mat->rowids, 
+                            gk_imalloc(mat->nrows, "gk_csr_Dup: rowids"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms"));
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind, 
+                            gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval, 
+                            gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval"));
+
+  /* copy the col structure */
+  if (mat->colptr)
+    nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr, 
+                            gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr"));
+  if (mat->colids)
+    nmat->colids = gk_icopy(mat->ncols, mat->colids, 
+                            gk_imalloc(mat->ncols, "gk_csr_Dup: colids"));
+  if (mat->cnorms)
+    nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms"));
+  if (mat->colind)
+    nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind, 
+                            gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind"));
+  if (mat->colval)
+    nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval, 
+                            gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containint a set of consecutive rows.
+    \param mat is the original matrix.
+    \param rstart is the starting row.
+    \param nrows is the number of rows from rstart to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows)
+{
+  ssize_t i;
+  gk_csr_t *nmat;
+
+  if (rstart+nrows > mat->nrows)
+    return NULL;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart, 
+                              gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr"));
+  for (i=nrows; i>=0; i--)
+    nmat->rowptr[i] -= nmat->rowptr[0];
+  ASSERT(nmat->rowptr[0] == 0);
+
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(nrows, mat->rowids+rstart, 
+                            gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms"));
+
+  if (mat->rsums)
+    nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums"));
+
+  ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]);
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowind+mat->rowptr[rstart], 
+                            gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowval+mat->rowptr[rstart], 
+                            gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containing a certain set of rows.
+    \param mat is the original matrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = nrows;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += mat->rowptr[rind[i]+1]-mat->rowptr[rind[i]];
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+    gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+    nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    nmat->rowptr[++j] = nnz;
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix corresponding to a specified partitioning of rows.
+    \param mat is the original matrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = 0;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      nmat->nrows++;
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    }
+  }
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+      gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+      nmat->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Splits the matrix into multiple sub-matrices based on the provided
+    color array.
+    \param mat is the original matrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the matrix (row-wise structure). The matrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of matrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_csr_t **smats;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color)+1;
+
+  smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats");
+  for (i=0; i<ncolors; i++) {
+    smats[i] = gk_csr_Create();
+    smats[i]->nrows  = mat->nrows;
+    smats[i]->ncols  = mat->ncols;
+    smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      smats[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, smats[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    smats[i]->rowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind"); 
+    smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      smats[color[j]]->rowind[smats[color[j]]->rowptr[i]] = rowind[j];
+      smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j];
+      smats[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, smats[i]->rowptr);
+
+  return smats;
+}
+
+
+/**************************************************************************/
+/*! Reads a CSR matrix from the supplied file and stores it the matrix's 
+    forward structure.
+    \param filename is the file that stores the data.
+    \param format is either GK_CSR_FMT_METIS, GK_CSR_FMT_CLUTO, 
+           GK_CSR_FMT_CSR, GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL 
+           specifying the type of the input format. 
+           The GK_CSR_FMT_CSR does not contain a header
+           line, whereas the GK_CSR_FMT_BINROW is a binary format written 
+           by gk_csr_Write() using the same format specifier.
+    \param readvals is either 1 or 0, indicating if the CSR file contains
+           values or it does not. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \param numbering is either 1 or 0, indicating if the numbering of the 
+           indices start from 1 or 0, respectively. If they start from 1, 
+           they are automatically decreamented during input so that they
+           will start from 0. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \returns the matrix that was read.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering)
+{
+  ssize_t i, k, l;
+  size_t nfields, nrows, ncols, nnz, fmt, ncon;
+  size_t lnlen;
+  ssize_t *rowptr;
+  int *rowind, ival;
+  float *rowval=NULL, fval;
+  int readsizes, readwgts;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin;
+  gk_csr_t *mat=NULL;
+
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  if (format == GK_CSR_FMT_BINROW) {
+    mat = gk_csr_Create();
+
+    fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+    if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+      gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+    if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+      gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+    mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr");
+    if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1)
+      gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename);
+    mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind");
+    if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+      gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename);
+    if (readvals == 1) {
+      mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval");
+      if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+        gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename);
+    }
+
+    gk_fclose(fpin);
+    return mat;
+  }
+
+  if (format == GK_CSR_FMT_BINCOL) {
+    mat = gk_csr_Create();
+
+    fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+    if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+      gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+    if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+      gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+    mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr");
+    if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1)
+      gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename);
+    mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind");
+    if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+      gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename);
+    if (readvals) {
+      mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval");
+      if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+        gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename);
+    }
+
+    gk_fclose(fpin);
+    return mat;
+  }
+
+
+  if (format == GK_CSR_FMT_CLUTO) {
+    fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+    do {
+      if (gk_getline(&line, &lnlen, fpin) <= 0)
+        gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+    } while (line[0] == '%');
+
+    if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3)
+      gk_errexit(SIGERR, "Header line must contain 3 integers.\n");
+
+    readsizes = 0;
+    readwgts  = 0;
+    readvals  = 1;
+    numbering = 1;
+  }
+  else if (format == GK_CSR_FMT_METIS) {
+    fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+    do {
+      if (gk_getline(&line, &lnlen, fpin) <= 0)
+        gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+    } while (line[0] == '%');
+
+    fmt = ncon = 0;
+    nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon);
+    if (nfields < 2)
+      gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+    ncols = nrows;
+    nnz *= 2;
+
+    if (fmt > 111)
+      gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+    sprintf(fmtstr, "%03zu", fmt%1000);
+    readsizes = (fmtstr[0] == '1');
+    readwgts  = (fmtstr[1] == '1');
+    readvals  = (fmtstr[2] == '1');
+    numbering = 1;
+    ncon      = (ncon == 0 ? 1 : ncon);
+  }
+  else {
+    readsizes = 0;
+    readwgts  = 0;
+
+    gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
+
+    if (readvals == 1 && nnz%2 == 1)
+      gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals);
+    if (readvals == 1)
+      nnz = nnz/2;
+    fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+  }
+
+  mat = gk_csr_Create();
+
+  mat->nrows = nrows;
+
+  rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr");
+  rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind");
+  if (readvals != 2)
+    rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval");
+
+  if (readsizes)
+    mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes");
+
+  if (readwgts)
+    mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts");
+
+  /*----------------------------------------------------------------------
+   * Read the sparse matrix file
+   *---------------------------------------------------------------------*/
+  numbering = (numbering ? - 1 : 0);
+  for (ncols=0, rowptr[0]=0, k=0, i=0; i<nrows; i++) {
+    do {
+      if (gk_getline(&line, &lnlen, fpin) == -1)
+        gk_errexit(SIGERR, "Premature end of input file: file while reading row %d\n", i);
+    } while (line[0] == '%');
+
+    head = line;
+    tail = NULL;
+
+    /* Read vertex sizes */
+    if (readsizes) {
+#ifdef __MSC__
+      mat->rsizes[i] = (float)strtod(head, &tail);
+#else
+      mat->rsizes[i] = strtof(head, &tail);
+#endif
+      if (tail == head)
+        gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+      if (mat->rsizes[i] < 0)
+        errexit("The size for vertex %zd must be >= 0\n", i+1);
+      head = tail;
+    }
+
+    /* Read vertex weights */
+    if (readwgts) {
+      for (l=0; l<ncon; l++) {
+#ifdef __MSC__
+        mat->rwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+        mat->rwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+        if (tail == head)
+          errexit("The line for vertex %zd does not have enough weights "
+                  "for the %d constraints.\n", i+1, ncon);
+        if (mat->rwgts[i*ncon+l] < 0)
+          errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+        head = tail;
+      }
+    }
+
+   
+    /* Read the rest of the row */
+    while (1) {
+      ival = (int)strtol(head, &tail, 0);
+      if (tail == head) 
+        break;
+      head = tail;
+      
+      if ((rowind[k] = ival + numbering) < 0)
+        gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+
+      ncols = gk_max(rowind[k], ncols);
+
+      if (readvals == 1) {
+#ifdef __MSC__
+        fval = (float)strtod(head, &tail);
+#else
+	fval = strtof(head, &tail);
+#endif
+        if (tail == head)
+          gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k);
+        head = tail;
+
+        rowval[k] = fval;
+      }
+      k++;
+    }
+    rowptr[i+1] = k;
+  }
+
+  if (format == GK_CSR_FMT_METIS) {
+    ASSERT(ncols+1 == mat->nrows);
+    mat->ncols = mat->nrows;
+  }
+  else {
+    mat->ncols = ncols+1;
+  }
+
+  if (k != nnz)
+    gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in "
+                       "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k);
+
+  gk_fclose(fpin);
+
+  gk_free((void **)&line, LTERM);
+
+  return mat;
+}
+
+
+/**************************************************************************/
+/*! Writes the row-based structure of a matrix into a file.
+    \param mat is the matrix to be written,
+    \param filename is the name of the output file.
+    \param format is one of: GK_CSR_FMT_CLUTO, GK_CSR_FMT_CSR, 
+           GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL.
+    \param writevals is either 1 or 0 indicating if the values will be 
+           written or not. This is only applicable when GK_CSR_FMT_CSR
+           is used.
+    \param numbering is either 1 or 0 indicating if the internal 0-based 
+           numbering will be shifted by one or not during output. This 
+           is only applicable when GK_CSR_FMT_CSR is used.
+*/
+/**************************************************************************/
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering)
+{
+  ssize_t i, j;
+  FILE *fpout;
+
+  if (format == GK_CSR_FMT_BINROW) {
+    if (filename == NULL)
+      gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+    fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+    fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+    fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+    fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout); 
+    fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout); 
+    if (writevals)
+      fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout); 
+
+    gk_fclose(fpout);
+    return;
+  }
+
+  if (format == GK_CSR_FMT_BINCOL) {
+    if (filename == NULL)
+      gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+    fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+    fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+    fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+    fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout); 
+    fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout); 
+    if (writevals) 
+      fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout); 
+
+    gk_fclose(fpout);
+    return;
+  }
+
+  if (filename)
+    fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+  else
+    fpout = stdout; 
+
+  if (format == GK_CSR_FMT_CLUTO) {
+    fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+    writevals = 1;
+    numbering = 1;
+  }
+
+  for (i=0; i<mat->nrows; i++) {
+    for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+      fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0));
+      if (writevals) 
+        fprintf(fpout, " %f", mat->rowval[j]);
+    }
+    fprintf(fpout, "\n");
+  }
+  if (filename)
+    gk_fclose(fpout);
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the matrix. The prunning takes place 
+    by analyzing the row structure of the matrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight entries whose
+    sum accounts for a certain fraction of the overall weight of the 
+    row/column.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param norm indicates the norm that will be used to aggregate the weights
+           and possible values are 1 or 2,
+    \param fraction is the fraction of the overall norm that will be retained
+           by the kept entries.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols, ncand, maxlen=0;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval, rsum, tsum;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+
+      for (i=0; i<ncols; i++) 
+        maxlen = gk_max(maxlen, colptr[i+1]-colptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<ncols; i++) {
+          for (tsum=0.0, ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+            cand[ncand].val = colind[j];
+            cand[ncand].key = colval[j];
+            tsum += (norm == 1 ? colval[j] : colval[j]*colval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[nrowptr[cand[j].val]] = i;
+            nrowval[nrowptr[cand[j].val]] = cand[j].key;
+            nrowptr[cand[j].val]++;
+          }
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      for (i=0; i<nrows; i++) 
+        maxlen = gk_max(maxlen, rowptr[i+1]-rowptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (tsum=0.0, ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+            cand[ncand].val = rowind[j];
+            cand[ncand].key = rowval[j];
+            tsum += (norm == 1 ? rowval[j] : rowval[j]*rowval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[rowptr[i]+j] = cand[j].val;
+            nrowval[rowptr[i]+j] = cand[j].key;
+          }
+          nrowptr[i+1] = rowptr[i]+j;
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact nrowind/nrowval */
+      nrowptr[0] = nnz = 0;
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i+1]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight top-K entries 
+    along each row/column and those entries whose weight is greater than
+    a specified value.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param topk is the number of the highest weight entries to keep.
+    \param keepval is the weight of a term above which will be kept. This
+           is used to select additional terms past the first topk.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, ncols, ncand;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+      for (i=0; i<ncols; i++) {
+        for (ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+          cand[ncand].val = colind[j];
+          cand[ncand].key = colval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++) {
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+        for (; j<ncand; j++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+          cand[ncand].val = rowind[j];
+          cand[ncand].key = rowval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++, nnz++) {
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        for (; j<ncand; j++, nnz++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the terms whose contribution to
+    the total length of the document is greater than a user-splied multiple
+    over the average.
+
+    This routine assumes that the vectors are normalized to be unit length.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param zscore is the multiplicative factor over the average contribution 
+           to the length of the document.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore)
+{
+  ssize_t i, j, nnz;
+  int nrows;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind;
+  float *rowval, *nrowval, avgwgt;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nmat->nrows = mat->nrows;
+  nmat->ncols = mat->ncols;
+
+  nrows  = mat->nrows; 
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      gk_errexit(SIGERR, "This has not been implemented yet.\n");
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        avgwgt = zscore/(rowptr[i+1]-rowptr[i]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (rowval[j] > avgwgt) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Compacts the column-space of the matrix by removing empty columns.
+    As a result of the compaction, the column numbers are renumbered. 
+    The compaction operation is done in place and only affects the row-based
+    representation of the matrix.
+    The new columns are ordered in decreasing frequency.
+   
+    \param mat the matrix whose empty columns will be removed.
+*/
+/**************************************************************************/
+void gk_csr_CompactColumns(gk_csr_t *mat)
+{
+  ssize_t i;
+  int nrows, ncols, nncols;
+  ssize_t *rowptr;
+  int *rowind, *colmap;
+  gk_ikv_t *clens;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+  colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap");
+
+  clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens");
+  for (i=0; i<ncols; i++) {
+    clens[i].key = 0;
+    clens[i].val = i;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    clens[rowind[i]].key++;
+  gk_ikvsortd(ncols, clens);
+
+  for (nncols=0, i=0; i<ncols; i++) {
+    if (clens[i].key > 0) 
+      colmap[clens[i].val] = nncols++;
+    else
+      break;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    rowind[i] = colmap[rowind[i]];
+
+  mat->ncols = nncols;
+
+  gk_free((void **)&colmap, &clens, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Sorts the indices in increasing order
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which set of
+           indices to sort.
+*/
+/**************************************************************************/
+void gk_csr_SortIndices(gk_csr_t *mat, int what)
+{
+  int n, nn=0;
+  ssize_t *ptr;
+  int *ind;
+  float *val;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      ind = mat->rowind;
+      val = mat->rowval;
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      ind = mat->colind;
+      val = mat->colval;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel if (n > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    float *tval;
+
+    #pragma omp single
+    for (i=0; i<n; i++) 
+      nn = gk_max(nn, ptr[i+1]-ptr[i]);
+  
+    cand = gk_ikvmalloc(nn, "gk_csr_SortIndices: cand");
+    tval = gk_fmalloc(nn, "gk_csr_SortIndices: tval");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<n; i++) {
+      for (k=0, j=ptr[i]; j<ptr[i+1]; j++) {
+        if (j > ptr[i] && ind[j] < ind[j-1])
+          k = 1; /* an inversion */
+        cand[j-ptr[i]].val = j-ptr[i];
+        cand[j-ptr[i]].key = ind[j];
+        tval[j-ptr[i]]     = val[j];
+      }
+      if (k) {
+        gk_ikvsorti(ptr[i+1]-ptr[i], cand);
+        for (j=ptr[i]; j<ptr[i+1]; j++) {
+          ind[j] = cand[j-ptr[i]].key;
+          val[j] = tval[cand[j-ptr[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &tval, LTERM);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Creates a row/column index from the column/row data.
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which index
+           will be created.
+*/
+/**************************************************************************/
+void gk_csr_CreateIndex(gk_csr_t *mat, int what)
+{
+  /* 'f' stands for forward, 'r' stands for reverse */
+  ssize_t i, j, k, nf, nr;
+  ssize_t *fptr, *rptr;
+  int *find, *rind;
+  float *fval, *rval;
+
+  switch (what) {
+    case GK_CSR_COL:
+      nf   = mat->nrows;
+      fptr = mat->rowptr;
+      find = mat->rowind;
+      fval = mat->rowval;
+
+      if (mat->colptr) gk_free((void **)&mat->colptr, LTERM);
+      if (mat->colind) gk_free((void **)&mat->colind, LTERM);
+      if (mat->colval) gk_free((void **)&mat->colval, LTERM);
+
+      nr   = mat->ncols;
+      rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    case GK_CSR_ROW:
+      nf   = mat->ncols;
+      fptr = mat->colptr;
+      find = mat->colind;
+      fval = mat->colval;
+
+      if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM);
+      if (mat->rowind) gk_free((void **)&mat->rowind, LTERM);
+      if (mat->rowval) gk_free((void **)&mat->rowval, LTERM);
+
+      nr   = mat->nrows;
+      rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+
+  for (i=0; i<nf; i++) {
+    for (j=fptr[i]; j<fptr[i+1]; j++)
+      rptr[find[j]]++;
+  }
+  MAKECSR(i, nr, rptr);
+  
+  if (rptr[nr] > 6*nr) {
+    for (i=0; i<nf; i++) {
+      for (j=fptr[i]; j<fptr[i+1]; j++) 
+        rind[rptr[find[j]]++] = i;
+    }
+    SHIFTCSR(i, nr, rptr);
+
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rval[rptr[find[j]]++] = fval[j];
+      }
+      SHIFTCSR(i, nr, rptr);
+    }
+  }
+  else {
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) {
+          k = find[j];
+          rind[rptr[k]]   = i;
+          rval[rptr[k]++] = fval[j];
+        }
+      }
+    }
+    else {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rind[rptr[find[j]]++] = i;
+      }
+    }
+    SHIFTCSR(i, nr, rptr);
+  }
+}
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the matrix to be unit 
+    length.
+    \param mat the matrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+  if (what&GK_CSR_ROW && mat->rowval) {
+    n   = mat->nrows;
+    ptr = mat->rowptr;
+    val = mat->rowval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS) 
+    {
+      #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++){
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; /* assume val[j] > 0 */ 
+        }
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+  	
+        }
+      }
+    }
+  }
+
+  if (what&GK_CSR_COL && mat->colval) {
+    n   = mat->ncols;
+    ptr = mat->colptr;
+    val = mat->colval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS)
+    {
+    #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++)
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; 
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+        }
+      }
+    }
+  }
+}
+
+
+/*************************************************************************/
+/*! Applies different row scaling methods.
+    \param mat the matrix itself,
+    \param type indicates the type of row scaling. Possible values are:
+           GK_CSR_MAXTF, GK_CSR_SQRT, GK_CSR_LOG, GK_CSR_IDF, GK_CSR_MAXTF2.
+*/
+/**************************************************************************/
+void gk_csr_Scale(gk_csr_t *mat, int type)
+{
+  ssize_t i, j;
+  int nrows, ncols, nnzcols, bgfreq;
+  ssize_t *rowptr;
+  int *rowind, *collen;
+  float *rowval, *cscale, maxtf;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  switch (type) {
+    case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j, maxtf) schedule(static)
+        for (i=0; i<nrows; i++) {
+          maxtf = fabs(rowval[rowptr[i]]);
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+            maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+          for (j=rowptr[i]; j<rowptr[i+1]; j++)
+            rowval[j] = .5 + .5*rowval[j]/maxtf;
+        }
+      }
+      break;
+
+    case GK_CSR_MAXTF2: /* TF' = .1 + .9*TF/MAX(TF) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j, maxtf) schedule(static)
+        for (i=0; i<nrows; i++) {
+          maxtf = fabs(rowval[rowptr[i]]);
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+            maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+          for (j=rowptr[i]; j<rowptr[i+1]; j++)
+            rowval[j] = .1 + .9*rowval[j]/maxtf;
+        }
+      }
+      break;
+
+    case GK_CSR_SQRT: /* TF' = .1+SQRT(TF) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = .1+sign(rowval[j], sqrt(fabs(rowval[j])));
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_POW25: /* TF' = .1+POW(TF,.25) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = .1+sign(rowval[j], sqrt(sqrt(fabs(rowval[j]))));
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_POW65: /* TF' = .1+POW(TF,.65) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .65));
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_POW75: /* TF' = .1+POW(TF,.75) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .75));
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_POW85: /* TF' = .1+POW(TF,.85) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .85));
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_LOG: /* TF' = 1+log_2(TF) */
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS)
+      {
+        double logscale = 1.0/log(2.0);
+        #pragma omp for schedule(static,32)
+        for (i=0; i<rowptr[nrows]; i++) {
+          if (rowval[i] != 0.0)
+            rowval[i] = 1+(rowval[i]>0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale;
+        }
+#ifdef XXX
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+            if (rowval[j] != 0.0)
+              rowval[j] = 1+(rowval[j]>0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale;
+              //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale);
+          }
+        }
+#endif
+      }
+      break;
+
+    case GK_CSR_IDF: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      #pragma omp parallel if (ncols > OMPMINOPS) 
+      {
+        #pragma omp for schedule(static)
+        for (i=0; i<ncols; i++)
+          cscale[i] = (collen[i] > 0 ? log(1.0*nrows/collen[i]) : 0.0);
+      }
+
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS) 
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++)
+            rowval[j] *= cscale[rowind[j]];
+        }
+      }
+
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    case GK_CSR_IDF2: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      nnzcols = 0;
+      #pragma omp parallel if (ncols > OMPMINOPS) 
+      {
+        #pragma omp for schedule(static) reduction(+:nnzcols)
+        for (i=0; i<ncols; i++)
+          nnzcols += (collen[i] > 0 ? 1 : 0);
+
+        bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols));
+        printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq);
+
+        #pragma omp for schedule(static)
+        for (i=0; i<ncols; i++)
+          cscale[i] = (collen[i] > 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0);
+      }
+
+      #pragma omp parallel if (rowptr[nrows] > OMPMINOPS) 
+      {
+        #pragma omp for private(j) schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++)
+            rowval[j] *= cscale[rowind[j]];
+        }
+      }
+
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown scaling type of %d\n", type);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Computes the sums of the rows/columns
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           sums to compute.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSums(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *sums;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rsums) 
+        gk_free((void **)&mat->rsums, LTERM);
+
+      sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->csums) 
+        gk_free((void **)&mat->csums, LTERM);
+
+      sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid sum type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+  for (i=0; i<n; i++) 
+    sums[i] = gk_fsum(ptr[i+1]-ptr[i], val+ptr[i], 1);
+}
+
+
+/*************************************************************************/
+/*! Computes the squared of the norms of the rows/columns
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           squared norms to compute.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *norms;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
+
+      norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
+
+      norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+  for (i=0; i<n; i++) 
+    norms[i] = gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1);
+}
+
+
+/*************************************************************************/
+/*! Computes the similarity between two rows/columns
+
+    \param mat the matrix itself. The routine assumes that the indices
+           are sorted in increasing order.
+    \param i1 is the first row/column,
+    \param i2 is the second row/column,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of
+           objects between the similarity will be computed,
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \returns the similarity between the two rows/columns.
+*/
+/**************************************************************************/
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype)
+{
+  int nind1, nind2;
+  int *ind1, *ind2;
+  float *val1, *val2, stat1, stat2, sim;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+      nind1 = mat->rowptr[i1+1]-mat->rowptr[i1];
+      nind2 = mat->rowptr[i2+1]-mat->rowptr[i2];
+      ind1  = mat->rowind + mat->rowptr[i1];
+      ind2  = mat->rowind + mat->rowptr[i2];
+      val1  = mat->rowval + mat->rowptr[i1];
+      val2  = mat->rowval + mat->rowptr[i2];
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+      nind1 = mat->colptr[i1+1]-mat->colptr[i1];
+      nind2 = mat->colptr[i2+1]-mat->colptr[i2];
+      ind1  = mat->colind + mat->colptr[i1];
+      ind2  = mat->colind + mat->colptr[i2];
+      val1  = mat->colval + mat->colptr[i1];
+      val2  = mat->colval + mat->colptr[i2];
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return 0.0;
+  }
+
+
+  switch (simtype) {
+    case GK_CSR_COS:
+    case GK_CSR_JAC:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else {
+          sim   += val1[i1]*val2[i2];
+          stat1 += val1[i1]*val1[i1];
+          stat2 += val2[i2]*val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      if (simtype == GK_CSR_COS)
+        sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
+      else 
+        sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+      break;
+
+    case GK_CSR_MIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+
+      break;
+
+    case GK_CSR_AMIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  return sim;
+
+}
+
+
+/*************************************************************************/
+/*! Finds the n most similar rows (neighbors) to the query using cosine
+    similarity.
+
+    \param mat the matrix itself
+    \param nqterms is the number of columns in the query
+    \param qind is the list of query columns
+    \param qval is the list of correspodning query weights
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \param nsim is the maximum number of requested most similar rows.
+           If -1 is provided, then everything is returned unsorted.
+    \param minsim is the minimum similarity of the requested most 
+           similar rows
+    \param hits is the result set. This array should be at least
+           of length nsim.
+    \param i_marker is an array of size equal to the number of rows
+           whose values are initialized to -1. If NULL is provided
+           then this array is allocated and freed internally.
+    \param i_cand is an array of size equal to the number of rows.
+           If NULL is provided then this array is allocated and freed 
+           internally.
+    \returns the number of identified most similar rows, which can be
+             smaller than the requested number of nnbrs in those cases
+             in which there are no sufficiently many neighbors.
+*/
+/**************************************************************************/
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, 
+        float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, 
+        int *i_marker, gk_fkv_t *i_cand)
+{
+  ssize_t i, ii, j, k;
+  int nrows, ncols, ncand;
+  ssize_t *colptr;
+  int *colind, *marker;
+  float *colval, *rnorms, mynorm, *rsums, mysum;
+  gk_fkv_t *cand;
+
+  if (nqterms == 0)
+    return 0;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker"));
+  cand   = (i_cand   ? i_cand   : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand"));
+
+  switch (simtype) {
+    case GK_CSR_COS:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_JAC:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+
+      rnorms = mat->rnorms;
+      mynorm = gk_fdot(nqterms, qval, 1, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rnorms[cand[i].val]+mynorm-cand[i].key);
+      break;
+
+    case GK_CSR_MIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      rsums = mat->rsums;
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rsums[cand[i].val]+mysum-cand[i].key);
+      break;
+
+    /* Assymetric MIN  similarity */
+    case GK_CSR_AMIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/mysum;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  /* go and prune the hits that are bellow minsim */
+  for (j=0, i=0; i<ncand; i++) {
+    marker[cand[i].val] = -1;
+    if (cand[i].key >= minsim) 
+      cand[j++] = cand[i];
+  }
+  ncand = j;
+
+  if (nsim == -1 || nsim >= ncand) {
+    nsim = ncand;
+  }
+  else {
+    nsim = gk_min(nsim, ncand);
+    gk_dfkvkselect(ncand, nsim, cand);
+    gk_fkvsortd(nsim, cand);
+  }
+
+  gk_fkvcopy(nsim, cand, hits);
+
+  if (i_marker == NULL)
+    gk_free((void **)&marker, LTERM);
+  if (i_cand == NULL)
+    gk_free((void **)&cand, LTERM);
+
+  return nsim;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/error.c b/3rdParty/metis/metis-5.1.0/GKlib/error.c
new file mode 100644
index 000000000..e2a18cf03
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/error.c
@@ -0,0 +1,214 @@
+/*!
+\file  error.c
+\brief Various error-handling functions
+
+This file contains functions dealing with error reporting and termination
+
+\author George
+\date 1/1/2007
+\version\verbatim $Id: error.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#define _GK_ERROR_C_  /* this is needed to properly declare the gk_jub* variables
+                         as an extern function in GKlib.h */
+
+#include <GKlib.h>
+
+
+/* These are the jmp_buf for the graceful exit in case of severe errors.
+   Multiple buffers are defined to allow for recursive invokation. */
+#define MAX_JBUFS 128
+__thread int gk_cur_jbufs=-1;
+__thread jmp_buf gk_jbufs[MAX_JBUFS];
+__thread jmp_buf gk_jbuf;
+
+typedef void (*gksighandler_t)(int);
+
+/* These are the holders of the old singal handlers for the trapped signals */
+static __thread gksighandler_t old_SIGMEM_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS];  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS];  /* Custom signal */
+
+/* The following is used to control if the gk_errexit() will actually abort or not.
+   There is always a single copy of this variable */
+static int gk_exit_on_error = 1;
+
+
+/*************************************************************************/
+/*! This function sets the gk_exit_on_error variable 
+ */
+/*************************************************************************/
+void gk_set_exit_on_error(int value)
+{
+  gk_exit_on_error = value;
+}
+
+
+
+/*************************************************************************/
+/*! This function prints an error message and exits  
+ */
+/*************************************************************************/
+void errexit(char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n')
+        fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    exit(-2);
+
+  /* abort(); */
+}
+
+
+/*************************************************************************/
+/*! This function prints an error message and raises a signum signal
+ */
+/*************************************************************************/
+void gk_errexit(int signum, char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    raise(signum);
+}
+
+
+/***************************************************************************/
+/*! This function sets a number of signal handlers and sets the return point 
+    of a longjmp
+*/
+/***************************************************************************/
+int gk_sigtrap() 
+{
+  if (gk_cur_jbufs+1 >= MAX_JBUFS)
+    return 0;
+
+  gk_cur_jbufs++;
+
+  old_SIGMEM_handlers[gk_cur_jbufs]  = signal(SIGMEM,  gk_sigthrow);
+  old_SIGERR_handlers[gk_cur_jbufs]  = signal(SIGERR,  gk_sigthrow);
+
+  return 1;
+}
+  
+
+/***************************************************************************/
+/*! This function sets the handlers for the signals to their default handlers
+ */
+/***************************************************************************/
+int gk_siguntrap() 
+{
+  if (gk_cur_jbufs == -1)
+    return 0;
+
+  signal(SIGMEM,  old_SIGMEM_handlers[gk_cur_jbufs]);
+  signal(SIGERR,  old_SIGERR_handlers[gk_cur_jbufs]);
+
+  gk_cur_jbufs--;
+
+  return 1;
+}
+  
+
+/*************************************************************************/
+/*! This function is the custome signal handler, which all it does is to
+    perform a longjump to the most recent saved environment 
+ */
+/*************************************************************************/
+void gk_sigthrow(int signum)
+{
+  longjmp(gk_jbufs[gk_cur_jbufs], signum);
+}
+  
+
+/***************************************************************************
+* This function sets a number of signal handlers and sets the return point 
+* of a longjmp
+****************************************************************************/
+void gk_SetSignalHandlers() 
+{
+  old_SIGMEM_handler = signal(SIGMEM,  gk_NonLocalExit_Handler);
+  old_SIGERR_handler = signal(SIGERR,  gk_NonLocalExit_Handler);
+}
+  
+
+/***************************************************************************
+* This function sets the handlers for the signals to their default handlers
+****************************************************************************/
+void gk_UnsetSignalHandlers() 
+{
+  signal(SIGMEM,  old_SIGMEM_handler);
+  signal(SIGERR,  old_SIGERR_handler);
+}
+  
+
+/*************************************************************************
+* This function is the handler for SIGUSR1 that implements the cleaning up 
+* process prior to a non-local exit.
+**************************************************************************/
+void gk_NonLocalExit_Handler(int signum)
+{
+  longjmp(gk_jbuf, signum);
+}
+  
+
+/*************************************************************************/
+/*! \brief Thread-safe implementation of strerror() */
+/**************************************************************************/
+char *gk_strerror(int errnum)
+{
+#if defined(WIN32) || defined(__MINGW32__)
+  return strerror(errnum);
+#else 
+#ifndef SUNOS
+  static __thread char buf[1024];
+
+  strerror_r(errnum, buf, 1024);
+
+  buf[1023] = '\0';
+  return buf;
+#else
+  return strerror(errnum);
+#endif
+#endif
+}
+
+
+
+/*************************************************************************
+* This function prints a backtrace of calling functions
+**************************************************************************/
+void PrintBackTrace()
+{
+#ifdef HAVE_EXECINFO_H
+  void *array[10];
+  int i, size;
+  char **strings;
+
+  size = backtrace(array, 10);
+  strings = backtrace_symbols(array, size);
+  
+  printf("Obtained %d stack frames.\n", size);
+  for (i=0; i<size; i++) {
+    printf("%s\n", strings[i]);
+  }
+  free(strings);
+#endif
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/evaluate.c b/3rdParty/metis/metis-5.1.0/GKlib/evaluate.c
new file mode 100644
index 000000000..ce805ced9
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/evaluate.c
@@ -0,0 +1,132 @@
+/*!
+  \file  evaluate.c
+  \brief Various routines to evaluate classification performance
+
+  \author George
+  \date 9/23/2008
+  \version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/**********************************************************************
+ * This function computes the max accuracy score of a ranked list,
+ * given +1/-1 class list
+ **********************************************************************/
+float ComputeAccuracy(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FN = 0;
+  float bAccuracy = 0.0;
+  float acc;
+  
+  for (P=0, i=0;i<n;i++)
+    P += (list[i].val == 1? 1 : 0);
+  N = n - P;
+  
+  TP = FN = 0;
+  
+  for(i=0; i<n; i++){
+    if (list[i].val == 1)
+      TP++; 
+    else
+      FN++;
+    
+    acc = (TP + N - FN) * 100.0/ (P + N) ;
+    if (acc > bAccuracy)
+      bAccuracy = acc;
+  }
+  
+  return bAccuracy;
+}
+
+
+/*****************************************************************************
+ * This function computes the ROC score of a ranked list, given a +1/-1 class
+ * list.
+ ******************************************************************************/
+float ComputeROCn(int n, int maxN, gk_fkv_t *list)
+{
+  int i, P, TP, FP, TPprev, FPprev, AUC;
+  float prev;
+  
+  FP = TP = FPprev = TPprev = AUC = 0;
+  prev = list[0].key -1;
+  
+  for (P=0, i=0; i<n; i++)
+    P += (list[i].val == 1 ? 1 : 0);
+  
+  for (i=0; i<n && FP < maxN; i++) {
+    if (list[i].key != prev) {
+      AUC += (TP+TPprev)*(FP-FPprev)/2;
+      prev = list[i].key;
+      FPprev = FP;
+      TPprev = TP;
+    }
+    if (list[i].val == 1) 
+      TP++;
+    else {
+      FP++;
+    }
+  }
+  AUC += (TP+TPprev)*(FP-FPprev)/2;
+
+  return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0);
+}
+
+
+/*****************************************************************************
+* This function computes the median rate of false positive for each positive
+* instance.
+******************************************************************************/
+float ComputeMedianRFP(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FP;
+
+  P = N = 0;
+  for (i=0; i<n; i++) {
+    if (list[i].val == 1)
+      P++;
+    else
+      N++;
+  }
+  
+  FP = TP = 0;
+  for (i=0; i<n && TP < (P+1)/2; i++) {
+    if (list[i].val == 1) 
+      TP++;
+    else 
+      FP++;
+  }
+  
+  return 1.0*FP/N;
+}
+
+/*********************************************************
+ * Compute the mean
+ ********************************************************/
+float ComputeMean (int n, float *values)
+{
+  int i;
+  float mean = 0.0;
+
+  for(i=0; i < n; i++)
+    mean += values[i];
+  
+  return 1.0 * mean/ n;
+}
+
+/********************************************************
+ * Compute the standard deviation
+ ********************************************************/
+float ComputeStdDev(int  n, float *values)
+{
+  int i;
+  float mean = ComputeMean(n, values);
+  float stdDev = 0;
+  
+  for(i=0;i<n;i++){
+    stdDev += (values[i] - mean)* (values[i] - mean);
+  }
+  
+  return sqrt(1.0 * stdDev/n);
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/fkvkselect.c b/3rdParty/metis/metis-5.1.0/GKlib/fkvkselect.c
new file mode 100644
index 000000000..b1238ce65
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/fkvkselect.c
@@ -0,0 +1,142 @@
+/*!
+\file  dfkvkselect.c
+\brief Sorts only the largest k values
+ 
+\date   Started 7/14/00
+\author George
+\version\verbatim $Id: fkvkselect.c 10711 2011-08-31 22:23:04Z karypis $\endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* Byte-wise swap two items of size SIZE. */
+#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0)
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' largest values in the beginning of the array */
+/*******************************************************************************/
+int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+    if (cand[hi].key > cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key >= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key < cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key > cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' smallest values in the beginning of the array */
+/*******************************************************************************/
+int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+    if (cand[hi].key < cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key <= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key > cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key < cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/fs.c b/3rdParty/metis/metis-5.1.0/GKlib/fs.c
new file mode 100644
index 000000000..35e4b97b2
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/fs.c
@@ -0,0 +1,225 @@
+/*!
+\file  fs.c
+\brief Various file-system functions.
+
+This file contains various functions that deal with interfacing with 
+the filesystem in a portable way.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: fs.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************
+* This function checks if a file exists
+**************************************************************************/
+int gk_fexists(char *fname)
+{
+  struct stat status;
+
+  if (stat(fname, &status) == -1)
+    return 0;
+
+  return S_ISREG(status.st_mode);
+}
+
+
+/*************************************************************************
+* This function checks if a directory exists
+**************************************************************************/
+int gk_dexists(char *dirname)
+{
+  struct stat status;
+
+  if (stat(dirname, &status) == -1)
+    return 0;
+
+  return S_ISDIR(status.st_mode);
+}
+
+
+/*************************************************************************/
+/*! \brief Returns the size of the file in bytes
+
+This function returns the size of a file as a 64 bit integer. If there 
+were any errors in stat'ing the file, -1 is returned.
+\note That due to the -1 return code, the maximum file size is limited to
+      63 bits (which I guess is okay for now).
+*/
+/**************************************************************************/
+intmax_t gk_getfsize(char *filename)
+{
+  struct stat status;
+
+  if (stat(filename, &status) == -1)
+    return -1;
+
+  return (intmax_t)(status.st_size);
+}
+
+
+/*************************************************************************/
+/*! This function gets some basic statistics about the file. 
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+    \param r_ntokens is the number of tokens in the file. If it is NULL,
+           this information is not returned.
+    \param r_max_nlntokens is the maximum number of tokens in any line
+           in the file. If it is NULL this information is not returned.
+    \param r_nbytes is the number of bytes in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+        size_t *r_max_nlntokens, size_t *r_nbytes)
+{
+  size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread;
+  int intoken=0;
+  char buffer[2049], *cptr;
+  FILE *fpin;
+
+  fpin = gk_fopen(fname, "r", "gk_GetFileStats");
+
+  while (!feof(fpin)) {
+    nread = fread(buffer, sizeof(char), 2048, fpin);
+    nbytes += nread;
+
+    buffer[nread] = '\0';  /* There is space for this one */
+    for (cptr=buffer; *cptr!='\0'; cptr++) {
+      if (*cptr == '\n') {
+        nlines++;
+        ntokens += intoken;
+        intoken = 0;
+        if (max_nlntokens < ntokens-oldntokens)
+          max_nlntokens = ntokens-oldntokens;
+        oldntokens = ntokens;
+      }
+      else if (*cptr == ' ' || *cptr == '\t') {
+        ntokens += intoken;
+        intoken = 0;
+      }
+      else {
+        intoken = 1;
+      }
+    }
+  }
+  ntokens += intoken;
+  if (max_nlntokens < ntokens-oldntokens)
+    max_nlntokens = ntokens-oldntokens;
+
+  gk_fclose(fpin);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+  if (r_ntokens != NULL)
+    *r_ntokens = ntokens;
+  if (r_max_nlntokens != NULL)
+    *r_max_nlntokens = max_nlntokens;
+  if (r_nbytes != NULL)
+    *r_nbytes  = nbytes;
+}
+
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the basename of the file.
+* The basename is derived from the actual filename by stripping the last
+* .ext part.
+**************************************************************************/
+char *gk_getbasename(char *path)
+{
+  char *startptr, *endptr;
+  char *basename;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    startptr = path;
+  else 
+    startptr = startptr+1;
+
+  basename = gk_strdup(startptr);
+
+  if ((endptr = strrchr(basename, '.')) != NULL) 
+    *endptr = '\0';
+
+  return basename;
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string corresponding to its file extension. The
+* extension of a file is considered to be the string right after the 
+* last '.' character.
+**************************************************************************/
+char *gk_getextname(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '.')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the filename.
+**************************************************************************/
+char *gk_getfilename(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and extracts the directory path component if it exists, otherwise it
+* returns "./" as the path. The memory for it is dynamically allocated.
+**************************************************************************/
+char *getpathname(char *path)
+{
+  char *endptr, *tmp;
+
+  if ((endptr = strrchr(path, '/')) == NULL) {
+    return gk_strdup(".");
+  }
+  else  {
+    tmp = gk_strdup(path);
+    *(strrchr(tmp, '/')) = '\0';
+    return tmp;
+  }
+}
+
+
+
+/*************************************************************************
+* This function creates a path
+**************************************************************************/
+int gk_mkpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "mkdir -p %s", pathname);
+  return system(tmp);
+}
+
+
+/*************************************************************************
+* This function deletes a directory tree and all of its contents
+**************************************************************************/
+int gk_rmpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "rm -r %s", pathname);
+  return system(tmp);
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/getopt.c b/3rdParty/metis/metis-5.1.0/GKlib/getopt.c
new file mode 100644
index 000000000..437befc86
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/getopt.c
@@ -0,0 +1,854 @@
+/*************************************************************************/
+/*! \file getopt.c
+\brief Command line parsing 
+
+This file contains a implementation of GNU's Getopt facility. The purpose
+for including it here is to ensure portability across different unix- and
+windows-based systems.
+
+\warning 
+The implementation provided here uses the \c gk_ prefix for all variables
+used by the standard Getopt facility to communicate with the program.
+So, do read the documentation here.
+
+\verbatim
+   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
+   Free Software Foundation, Inc. This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  
+\endverbatim
+*/
+/*************************************************************************/
+
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/* Local function prototypes */
+/*************************************************************************/
+static void exchange (char **);
+static char *gk_getopt_initialize (int, char **, char *);
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only);
+
+
+
+/*************************************************************************/
+/*! \brief For communication arguments to the caller.
+
+This variable is set by getopt to point at the value of the option argument, 
+for those options that accept arguments.
+*/
+/*************************************************************************/
+char *gk_optarg;
+
+
+/*************************************************************************/
+/*! \brief Index in ARGV of the next element to be scanned. 
+
+This variable is set by getopt to the index of the next element of the argv 
+array to be processed. Once getopt has found all of the option arguments, 
+you can use this variable to determine where the remaining non-option arguments 
+begin. 
+*/
+/*************************************************************************/
+int gk_optind = 1; 
+
+
+/*************************************************************************/
+/*! \brief Controls error reporting for unrecognized options.  
+
+If the value of this variable is nonzero, then getopt prints an error 
+message to the standard error stream if it encounters an unknown option 
+character or an option with a missing required argument. This is the default 
+behavior. If you set this variable to zero, getopt does not print any messages,
+but it still returns the character ? to indicate an error.
+*/
+/*************************************************************************/
+int gk_opterr = 1;
+
+
+/*************************************************************************/
+/*! \brief Stores unknown option characters
+
+When getopt encounters an unknown option character or an option with a 
+missing required argument, it stores that option character in this 
+variable. You can use this for providing your own diagnostic messages.
+*/
+/*************************************************************************/
+int gk_optopt = '?';
+
+
+/*************************************************************************/
+/*
+Records that the getopt facility has been initialized.
+*/
+/*************************************************************************/
+int gk_getopt_initialized;
+
+
+/*************************************************************************/
+/*
+The next char to be scanned in the option-element in which the last option 
+character we returned was found.  This allows us to pick up the scan where 
+we left off.
+
+If this is zero, or a null string, it means resume the scan by advancing 
+to the next ARGV-element.  
+*/
+/*************************************************************************/
+static char *nextchar;
+
+
+/*************************************************************************/
+/*
+Value of POSIXLY_CORRECT environment variable.  
+*/
+/*************************************************************************/
+static char *posixly_correct;
+
+
+/*************************************************************************/
+/*
+Describe how to deal with options that follow non-option ARGV-elements.
+
+If the caller did not specify anything, the default is REQUIRE_ORDER if 
+the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+REQUIRE_ORDER means don't recognize them as options; stop option processing 
+when the first non-option is seen.  This is what Unix does.  This mode of 
+operation is selected by either setting the environment variable 
+POSIXLY_CORRECT, or using `+' as the first character of the list of 
+option characters.
+
+PERMUTE is the default.  We permute the contents of ARGV as we scan, so 
+that eventually all the non-options are at the end.  This allows options
+to be given in any order, even with programs that were not written to
+expect this.
+
+RETURN_IN_ORDER is an option available to programs that were written
+to expect options and other ARGV-elements in any order and that care 
+about the ordering of the two.  We describe each non-option ARGV-element
+as if it were the argument of an option with character code 1.
+Using `-' as the first character of the list of option characters
+selects this mode of operation.
+
+The special argument `--' forces an end of option-scanning regardless
+of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+`--' can cause `getopt' to return -1 with `gk_optind' != ARGC.  
+*/
+/*************************************************************************/
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+
+
+/*************************************************************************/
+/* 
+Describe the part of ARGV that contains non-options that have
+been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+`last_nonopt' is the index after the last of them.  
+*/
+/*************************************************************************/
+static int first_nonopt;
+static int last_nonopt;
+
+
+
+
+
+/*************************************************************************/
+/*
+Handle permutation of arguments.  
+
+Exchange two adjacent subsequences of ARGV. 
+One subsequence is elements [first_nonopt,last_nonopt)
+which contains all the non-options that have been skipped so far.
+The other is elements [last_nonopt,gk_optind), which contains all
+the options processed since those non-options were skipped.
+
+`first_nonopt' and `last_nonopt' are relocated so that they describe
+the new indices of the non-options in ARGV after they are moved.  
+*/
+/*************************************************************************/
+static void exchange (char **argv)
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = gk_optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom) {
+    if (top - middle > middle - bottom) {
+      /* Bottom segment is the short one.  */
+      int len = middle - bottom;
+      register int i;
+
+      /* Swap it with the top part of the top segment.  */
+      for (i = 0; i < len; i++) {
+	tem = argv[bottom + i];
+	argv[bottom + i] = argv[top - (middle - bottom) + i];
+	argv[top - (middle - bottom) + i] = tem;
+      }
+      /* Exclude the moved bottom segment from further swapping.  */
+      top -= len;
+    }
+    else {
+      /* Top segment is the short one.  */
+      int len = top - middle;
+      register int i;
+
+      /* Swap it with the bottom part of the bottom segment.  */
+      for (i = 0; i < len; i++) {
+        tem = argv[bottom + i];
+        argv[bottom + i] = argv[middle + i];
+        argv[middle + i] = tem;
+      }
+      /* Exclude the moved top segment from further swapping.  */
+      bottom += len;
+    }
+  }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (gk_optind - last_nonopt);
+  last_nonopt = gk_optind;
+}
+
+
+
+/*************************************************************************/
+/*
+Initialize the internal data when the first call is made.  
+*/
+/*************************************************************************/
+static char *gk_getopt_initialize (int argc, char **argv, char *optstring)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = gk_optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+  if (optstring[0] == '-') {
+    ordering = RETURN_IN_ORDER;
+    ++optstring;
+  }
+  else if (optstring[0] == '+') {
+    ordering = REQUIRE_ORDER;
+    ++optstring;
+  }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+  return optstring;
+}
+
+
+/*************************************************************************/
+/*
+   Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `gk_optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `gk_optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `gk_opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `gk_optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   LONGOPTS is a vector of `struct gk_option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  
+*/
+/*************************************************************************/
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only)
+{
+  int print_errors = gk_opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  gk_optarg = NULL;
+
+  if (gk_optind == 0 || !gk_getopt_initialized) {
+    if (gk_optind == 0)
+      gk_optind = 1;	/* Don't scan ARGV[0], the program name.  */
+      optstring = gk_getopt_initialize (argc, argv, optstring);
+      gk_getopt_initialized = 1;
+    }
+
+  /* Test whether ARGV[gk_optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0')
+
+  if (nextchar == NULL || *nextchar == '\0') {
+    /* Advance to the next ARGV-element.  */
+
+    /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+       moved back by the user (who may also have changed the arguments).  */
+    if (last_nonopt > gk_optind)
+      last_nonopt = gk_optind;
+    if (first_nonopt > gk_optind)
+      first_nonopt = gk_optind;
+
+    if (ordering == PERMUTE) {
+      /* If we have just processed some options following some non-options,
+	 exchange them so that the options come first.  */
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+	exchange ((char **) argv);
+      else if (last_nonopt != gk_optind)
+	first_nonopt = gk_optind;
+
+      /* Skip any additional non-options
+	 and extend the range of non-options previously skipped.  */
+
+      while (gk_optind < argc && NONOPTION_P)
+        gk_optind++;
+
+      last_nonopt = gk_optind;
+    }
+
+    /* The special ARGV-element `--' means premature end of options.
+       Skip it like a null option,
+       then exchange with previous non-options as if it were an option,
+       then skip everything else like a non-option.  */
+
+    if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) {
+      gk_optind++;
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+        exchange ((char **) argv);
+      else if (first_nonopt == last_nonopt)
+        first_nonopt = gk_optind;
+      last_nonopt = argc;
+
+      gk_optind = argc;
+    }
+
+    /* If we have done all the ARGV-elements, stop the scan
+       and back over any non-options that we skipped and permuted.  */
+
+    if (gk_optind == argc) {
+      /* Set the next-arg-index to point at the non-options
+	 that we previously skipped, so the caller will digest them.  */
+      if (first_nonopt != last_nonopt)
+	gk_optind = first_nonopt;
+      return -1;
+    }
+
+    /* If we have come to a non-option and did not permute it,
+       either stop the scan or describe it to the caller and pass it by.  */
+
+    if (NONOPTION_P) {
+      if (ordering == REQUIRE_ORDER)
+	return -1;
+      gk_optarg = argv[gk_optind++];
+      return 1;
+    }
+
+    /* We have found another option-ARGV-element.
+       Skip the initial punctuation.  */
+
+    nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-'));
+  }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) {
+    char *nameend;
+    struct gk_option *p;
+    struct gk_option *pfound = NULL;
+    int exact = 0;
+    int ambig = 0;
+    int indfound = -1;
+    int option_index;
+
+    for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+      /* Do nothing.  */ ;
+
+    /* Test all long options for either exact match or abbreviated matches.  */
+    for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+      if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+        if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) {
+	  /* Exact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	  exact = 1;
+	  break;
+	}
+	else if (pfound == NULL) {
+          /* First nonexact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	}
+	else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val)
+	  /* Second or later nonexact match found.  */
+	  ambig = 1;
+      }
+    }
+
+    if (ambig && !exact) {
+      if (print_errors)
+        fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]);
+
+      nextchar += strlen (nextchar);
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+
+    if (pfound != NULL) {
+      option_index = indfound;
+      gk_optind++;
+      if (*nameend) {
+	/* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	if (pfound->has_arg)
+	  gk_optarg = nameend + 1;
+	else {
+	  if (print_errors) {
+	    if (argv[gk_optind - 1][1] == '-')
+	      /* --option */
+	      fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name);
+	    else
+	      /* +option or -option */
+	      fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name);
+	  }
+
+	  nextchar += strlen (nextchar);
+
+	  gk_optopt = pfound->val;
+	  return '?';
+	}
+      }
+      else if (pfound->has_arg == 1) {
+	if (gk_optind < argc)
+	  gk_optarg = argv[gk_optind++];
+	else {
+	  if (print_errors)
+	    fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	  nextchar += strlen (nextchar);
+	  gk_optopt = pfound->val;
+	  return optstring[0] == ':' ? ':' : '?';
+	}
+      }
+      nextchar += strlen (nextchar);
+      if (longind != NULL)
+        *longind = option_index;
+      if (pfound->flag) {
+	*(pfound->flag) = pfound->val;
+	return 0;
+      }
+      return pfound->val;
+    }
+
+    /* Can't find it as a long option.  If this is not getopt_long_only,
+       or the option starts with '--' or is not a valid short
+        option, then it's an error. Otherwise interpret it as a short option.  */
+    if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) {
+      if (print_errors) {
+	if (argv[gk_optind][1] == '-')
+	  /* --option */
+	  fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar);
+	else
+	  /* +option or -option */
+	  fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar);
+      }
+      nextchar = (char *) "";
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+  }
+
+  /* Look at and handle the next short option-character.  */
+  {
+    char c = *nextchar++;
+    char *temp = strchr(optstring, c);
+
+    /* Increment `gk_optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++gk_optind;
+
+    if (temp == NULL || c == ':') {
+      if (print_errors) {
+        if (posixly_correct)
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c);
+	else
+	  fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
+      }
+      gk_optopt = c;
+      return '?';
+    }
+
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';') {
+      char *nameend;
+      struct gk_option *p;
+      struct gk_option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = 0;
+      int option_index;
+
+      /* This is an option that requires an argument.  */
+      if (*nextchar != '\0') {
+	gk_optarg = nextchar;
+	/* If we end this ARGV-element by taking the rest as an arg,
+	   we must advance to the next element now.  */
+	gk_optind++;
+      }
+      else if (gk_optind == argc) {
+	if (print_errors) {
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	}
+	gk_optopt = c;
+	if (optstring[0] == ':')
+	  c = ':';
+	else
+	  c = '?';
+	return c;
+      }
+      else
+	/* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	gk_optarg = argv[gk_optind++];
+
+      /* gk_optarg is now the argument, see if it's in the table of longopts.  */
+
+      for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+	if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+	  if ((unsigned int) (nameend - nextchar) == strlen (p->name)) {
+	    /* Exact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	    exact = 1;
+	    break;
+	  }
+	  else if (pfound == NULL) {
+	    /* First nonexact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	  }
+	  else
+	    /* Second or later nonexact match found.  */
+	    ambig = 1;
+	}
+      }
+      if (ambig && !exact) {
+	if (print_errors)
+	  fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]);
+	nextchar += strlen (nextchar);
+	gk_optind++;
+	return '?';
+      }
+      if (pfound != NULL) {
+	option_index = indfound;
+	if (*nameend) {
+	  /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	  if (pfound->has_arg)
+	    gk_optarg = nameend + 1;
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name);
+
+	    nextchar += strlen (nextchar);
+	    return '?';
+	  }
+	}
+	else if (pfound->has_arg == 1) {
+	  if (gk_optind < argc)
+	    gk_optarg = argv[gk_optind++];
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	    nextchar += strlen (nextchar);
+	    return optstring[0] == ':' ? ':' : '?';
+	  }
+        }
+	nextchar += strlen (nextchar);
+	if (longind != NULL)
+	  *longind = option_index;
+	if (pfound->flag) {
+	  *(pfound->flag) = pfound->val;
+	  return 0;
+	}
+	return pfound->val;
+      }
+      nextchar = NULL;
+      return 'W';	/* Let the application handle it.   */
+    }
+
+    if (temp[1] == ':') {
+      if (temp[2] == ':') {
+	/* This is an option that accepts an argument optionally.  */
+	if (*nextchar != '\0') {
+  	  gk_optarg = nextchar;
+	  gk_optind++;
+	}
+	else
+	  gk_optarg = NULL;
+	nextchar = NULL;
+      }
+      else {
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0') {
+	  gk_optarg = nextchar;
+	  /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now.  */
+	  gk_optind++;
+	}
+	else if (gk_optind == argc) {
+	  if (print_errors) {
+	    /* 1003.2 specifies the format of this message.  */
+	    fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	  }
+	  gk_optopt = c;
+	  if (optstring[0] == ':')
+	    c = ':';
+	  else
+	    c = '?';
+	}
+	else
+	  /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	  gk_optarg = argv[gk_optind++];
+	  nextchar = NULL;
+      }
+    }
+    return c;
+  }
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments
+
+The gk_getopt() function gets the next option argument from the argument 
+list specified by the \c argv and \c argc arguments. Normally these values 
+come directly from the arguments received by main().
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options is a string that specifies the option characters that 
+       are valid for this program. An option character in this string 
+       can be followed by a colon (`:') to indicate that it takes a 
+       required argument. If an option character is followed by two 
+       colons (`::'), its argument is optional; this is a GNU extension.
+
+\return  
+It returns the option character for the next command line option. When no 
+more option arguments are available, it returns -1. There may still be 
+more non-option arguments; you must compare the external variable 
+#gk_optind against the \c argc parameter to check this.
+
+\return  
+If the option has an argument, gk_getopt() returns the argument by storing 
+it in the variable #gk_optarg. You don't ordinarily need to copy the 
+#gk_optarg string, since it is a pointer into the original \c argv array, 
+not into a static area that might be overwritten.
+
+\return  
+If gk_getopt() finds an option character in \c argv that was not included 
+in options, or a missing option argument, it returns `?' and sets the 
+external variable #gk_optopt to the actual option character. 
+If the first character of options is a colon (`:'), then gk_getopt() 
+returns `:' instead of `?' to indicate a missing option argument. 
+In addition, if the external variable #gk_opterr is nonzero (which is 
+the default), gk_getopt() prints an error message.  This variable is 
+set by gk_getopt() to point at the value of the option argument, 
+for those options that accept arguments.
+
+
+gk_getopt() has three ways to deal with options that follow non-options 
+\c argv elements. The special argument <tt>`--'</tt> forces in all cases 
+the end of option scanning.
+  - The default is to permute the contents of \c argv while scanning it 
+    so that eventually all the non-options are at the end. This allows 
+    options to be given in any order, even with programs that were not 
+    written to expect this.
+  - If the options argument string begins with a hyphen (`-'), this is 
+    treated specially. It permits arguments that are not options to be 
+    returned as if they were associated with option character `\\1'.
+  - POSIX demands the following behavior: The first non-option stops 
+    option processing. This mode is selected by either setting the 
+    environment variable POSIXLY_CORRECT or beginning the options
+    argument string with a plus sign (`+'). 
+
+*/
+/*************************************************************************/
+int gk_getopt(int argc, char **argv, char *options)
+{
+  return gk_getopt_internal(argc, argv, options, NULL, NULL, 0);
+}
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with long options
+
+This function accepts GNU-style long options as well as single-character 
+options. 
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options describes the short options to accept, just as it does 
+       in gk_getopt(). 
+\param long_options describes the long options to accept. See the 
+       defintion of ::gk_option for more information.
+\param opt_index this is a returned variable.  For any long option, 
+       gk_getopt_long() tells you the index in the array \c long_options 
+       of the options definition, by storing it into <tt>*opt_index</tt>. 
+       You can get the name of the option with <tt>longopts[*opt_index].name</tt>. 
+       So you can distinguish among long options either by the values 
+       in their val fields or by their indices. You can also distinguish 
+       in this way among long options that set flags.
+
+
+\return
+When gk_getopt_long() encounters a short option, it does the same thing 
+that gk_getopt() would do: it returns the character code for the option, 
+and stores the options argument (if it has one) in #gk_optarg.
+
+\return
+When gk_getopt_long() encounters a long option, it takes actions based 
+on the flag and val fields of the definition of that option.
+
+\return
+If flag is a null pointer, then gk_getopt_long() returns the contents 
+of val to indicate which option it found. You should arrange distinct 
+values in the val field for options with different meanings, so you 
+can decode these values after gk_getopt_long() returns. If the long 
+option is equivalent to a short option, you can use the short option's 
+character code in val.
+
+\return
+If flag is not a null pointer, that means this option should just set 
+a flag in the program. The flag is a variable of type int that you 
+define. Put the address of the flag in the flag field. Put in the 
+val field the value you would like this option to store in the flag. 
+In this case, gk_getopt_long() returns 0.
+
+\return
+When a long option has an argument, gk_getopt_long() puts the argument 
+value in the variable #gk_optarg before returning. When the option has 
+no argument, the value in #gk_optarg is a null pointer. This is
+how you can tell whether an optional argument was supplied.
+
+\return
+When gk_getopt_long() has no more options to handle, it returns -1, 
+and leaves in the variable #gk_optind the index in argv of the next 
+remaining argument. 
+*/
+/*************************************************************************/
+int gk_getopt_long( int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with only long options
+
+Like gk_getopt_long(), but '-' as well as '--' can indicate a long option.
+If an option that starts with '-' (not '--') doesn't match a long option,
+but does match a short option, it is parsed as a short option instead.  
+*/
+/*************************************************************************/
+int gk_getopt_long_only(int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1);
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_arch.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_arch.h
new file mode 100644
index 000000000..2cb80ccf2
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_arch.h
@@ -0,0 +1,71 @@
+/*!
+\file gk_arch.h
+\brief This file contains various architecture-specific declerations
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_arch.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_ARCH_H_
+#define _GK_ARCH_H_
+
+/*************************************************************************
+* Architecture-specific differences in header files
+**************************************************************************/
+#ifdef LINUX
+#if !defined(__USE_XOPEN)
+#define __USE_XOPEN
+#endif
+#if !defined(_XOPEN_SOURCE)
+#define _XOPEN_SOURCE 600
+#endif
+#if !defined(__USE_XOPEN2K)
+#define __USE_XOPEN2K
+#endif
+#endif
+
+
+#ifdef HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+
+
+#ifdef __MSC__ 
+  #include "ms_stdint.h"
+  #include "ms_inttypes.h"
+  #include "ms_stat.h"
+#else
+#ifndef SUNOS
+  #include <stdint.h>
+#endif
+  #include <inttypes.h>
+  #include <sys/types.h>
+  #include <sys/resource.h>
+  #include <sys/time.h>
+#endif
+
+
+/*************************************************************************
+* Architecture-specific modifications
+**************************************************************************/
+#ifdef WIN32
+typedef ptrdiff_t ssize_t;
+#endif
+
+
+#ifdef SUNOS
+#define PTRDIFF_MAX  INT64_MAX
+#endif
+
+#ifdef __MSC__
+/* MSC does not have rint() function */
+#define rint(x) ((int)((x)+0.5))  
+
+/* MSC does not have INFINITY defined */
+#ifndef INFINITY
+#define INFINITY FLT_MAX
+#endif
+#endif
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_defs.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_defs.h
new file mode 100644
index 000000000..d75e72d24
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_defs.h
@@ -0,0 +1,69 @@
+/*!
+\file gk_defs.h
+\brief This file contains various constants definitions
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_defs.h 12732 2012-09-24 20:54:50Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_DEFS_H_
+#define _GK_DEFS_H_
+
+
+#define LTERM                   (void **) 0     /* List terminator for GKfree() */
+
+/* mopt_t types */
+#define GK_MOPT_MARK            1
+#define GK_MOPT_CORE            2
+#define GK_MOPT_HEAP            3
+
+#define HTABLE_EMPTY            -1
+#define HTABLE_DELETED          -2
+#define HTABLE_FIRST             1
+#define HTABLE_NEXT              2
+
+/* pdb corruption bit switches */
+#define CRP_ALTLOCS    1
+#define CRP_MISSINGCA  2
+#define CRP_MISSINGBB  4
+#define CRP_MULTICHAIN 8
+#define CRP_MULTICA    16
+#define CRP_MULTIBB    32
+
+#define MAXLINELEN 300000
+
+/* GKlib signals to standard signal mapping */
+#define SIGMEM  SIGABRT
+#define SIGERR  SIGTERM
+
+
+/* CSR-related defines */
+#define GK_CSR_ROW      1
+#define GK_CSR_COL      2
+
+#define GK_CSR_MAXTF    1
+#define GK_CSR_SQRT     2
+#define GK_CSR_POW25    3
+#define GK_CSR_POW65    4
+#define GK_CSR_POW75    5
+#define GK_CSR_POW85    6
+#define GK_CSR_LOG      7
+#define GK_CSR_IDF      8
+#define GK_CSR_IDF2     9
+#define GK_CSR_MAXTF2   10
+
+#define GK_CSR_COS      1
+#define GK_CSR_JAC      2
+#define GK_CSR_MIN      3
+#define GK_CSR_AMIN     4
+
+#define GK_CSR_FMT_CLUTO        1
+#define GK_CSR_FMT_CSR          2
+#define GK_CSR_FMT_METIS        3
+#define GK_CSR_FMT_BINROW       4
+#define GK_CSR_FMT_BINCOL       5
+
+#define GK_GRAPH_FMT_METIS      1
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_externs.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_externs.h
new file mode 100644
index 000000000..2c0fdd968
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_externs.h
@@ -0,0 +1,25 @@
+/*!
+\file gk_externs.h
+\brief This file contains definitions of external variables created by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_EXTERNS_H_
+#define _GK_EXTERNS_H_
+
+
+/*************************************************************************
+* Extern variable definition. Hopefully, the __thread makes them thread-safe.
+**************************************************************************/
+#ifndef _GK_ERROR_C_
+/* declared in error.c */
+extern __thread int gk_cur_jbufs;
+extern __thread jmp_buf gk_jbufs[];
+extern __thread jmp_buf gk_jbuf;
+
+#endif
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_getopt.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_getopt.h
new file mode 100644
index 000000000..4bb86115f
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_getopt.h
@@ -0,0 +1,64 @@
+/*!
+\file gk_getopt.h
+\brief This file contains GNU's externs/structs/prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_GETOPT_H_
+#define _GK_GETOPT_H_
+
+
+/* Externals from getopt.c */
+extern char *gk_optarg;
+extern int gk_optind;
+extern int gk_opterr;
+extern int gk_optopt;
+
+
+/*! \brief The structure that stores the information about the command-line options 
+
+This structure describes a single long option name for the sake of 
+gk_getopt_long(). The argument <tt>long_options</tt> must be an array 
+of these structures, one for each long option. Terminate the array with 
+an element containing all zeros.
+*/
+struct gk_option {
+  char *name;       /*!< This field is the name of the option. */
+  int has_arg;      /*!< This field says whether the option takes an argument. 
+                         It is an integer, and there are three legitimate values: 
+                         no_argument, required_argument and optional_argument. 
+                         */
+  int *flag;        /*!< See the discussion on ::gk_option#val */
+  int val;          /*!< These fields control how to report or act on the option 
+                         when it occurs. 
+                         
+                         If flag is a null pointer, then the val is a value which 
+                         identifies this option. Often these values are chosen 
+                         to uniquely identify particular long options.
+
+                         If flag is not a null pointer, it should be the address 
+                         of an int variable which is the flag for this option. 
+                         The value in val is the value to store in the flag to 
+                         indicate that the option was seen. */
+};
+
+/* Names for the values of the `has_arg' field of `struct gk_option'.  */
+#define no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+
+/* Function prototypes */
+extern int gk_getopt(int __argc, char **__argv, char *__shortopts);
+extern int gk_getopt_long(int __argc, char **__argv, char *__shortopts,
+              struct gk_option *__longopts, int *__longind);
+extern int gk_getopt_long_only (int __argc, char **__argv,
+              char *__shortopts, struct gk_option *__longopts, int *__longind);
+
+
+
+#endif
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_macros.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_macros.h
new file mode 100644
index 000000000..d1e288bc3
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_macros.h
@@ -0,0 +1,153 @@
+/*!
+\file gk_macros.h
+\brief This file contains various macros
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_macros.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MACROS_H_
+#define _GK_MACROS_H_
+
+/*-------------------------------------------------------------
+ * Usefull commands 
+ *-------------------------------------------------------------*/
+#define gk_max(a, b) ((a) >= (b) ? (a) : (b))
+#define gk_min(a, b) ((a) >= (b) ? (b) : (a))
+#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c)))
+#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) 
+#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0)
+#define sign(a, b) ((a >= 0 ? b : -b))
+
+#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0))
+#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand()))
+
+#define gk_abs(x) ((x) >= 0 ? (x) : -(x))
+
+
+/*-------------------------------------------------------------
+ * Timing macros
+ *-------------------------------------------------------------*/
+#define gk_clearcputimer(tmr) (tmr = 0.0)
+#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds())
+#define gk_stopcputimer(tmr)  (tmr += gk_CPUSeconds())
+#define gk_getcputimer(tmr)   (tmr)
+
+#define gk_clearwctimer(tmr) (tmr = 0.0)
+#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds())
+#define gk_stopwctimer(tmr)  (tmr += gk_WClockSeconds())
+#define gk_getwctimer(tmr)   (tmr)
+
+/*-------------------------------------------------------------
+ * dbglvl handling macros
+ *-------------------------------------------------------------*/
+#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd);
+
+
+/*-------------------------------------------------------------
+ * gracefull library exit macro
+ *-------------------------------------------------------------*/
+#define GKSETJMP() (setjmp(gk_return_to_entry))
+#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs]))
+ 
+
+/*-------------------------------------------------------------
+ * Debuging memory leaks
+ *-------------------------------------------------------------*/
+#ifdef DMALLOC
+#   define MALLOC_CHECK(ptr)                                          \
+    if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) {  \
+        printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+#else
+#   define MALLOC_CHECK(ptr) ;
+#endif 
+
+
+/*-------------------------------------------------------------
+ * CSR conversion macros
+ *-------------------------------------------------------------*/
+#define MAKECSR(i, n, a) \
+   do { \
+     for (i=1; i<n; i++) a[i] += a[i-1]; \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+#define SHIFTCSR(i, n, a) \
+   do { \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+
+/*-------------------------------------------------------------
+ * ASSERTS that cannot be turned off!
+ *-------------------------------------------------------------*/
+#define GKASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+
+#define GKASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        abort();                                                \
+    }
+
+#define GKCUASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+    }
+
+#define GKCUASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+    }
+
+/*-------------------------------------------------------------
+ * Program Assertions
+ *-------------------------------------------------------------*/
+#ifndef NDEBUG
+#   define ASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        assert(expr);                                                \
+    }
+
+#   define ASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        assert(expr);                                                \
+    }
+#else
+#   define ASSERT(expr) ;
+#   define ASSERTP(expr,msg) ;
+#endif 
+
+#ifndef NDEBUG2
+#   define ASSERT2 ASSERT
+#   define ASSERTP2 ASSERTP
+#else
+#   define ASSERT2(expr) ;
+#   define ASSERTP2(expr,msg) ;
+#endif
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkblas.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkblas.h
new file mode 100644
index 000000000..7dd96dff2
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkblas.h
@@ -0,0 +1,201 @@
+/*!
+\file  gk_mkblas.h
+\brief Templates for BLAS-like routines
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mkblas.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKBLAS_H_
+#define _GK_MKBLAS_H_
+
+
+#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \
+/*************************************************************************/\
+/*! The macro for gk_?incset()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = baseval+i;\
+\
+  return x;\
+}\
+\
+/*************************************************************************/\
+/*! The macro for gk_?max()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## max(size_t n, TYPE *x)\
+{\
+  size_t i, max=0; \
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (i=1; i<n; i++)\
+    max = (x[i] > x[max] ? i : max);\
+\
+  return x[max];\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?min()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## min(size_t n, TYPE *x)\
+{\
+  size_t i, min=0;\
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (i=1; i<n; i++)\
+    min = (x[i] < x[min] ? i : min);\
+\
+  return x[min];\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax(size_t n, TYPE *x)\
+{\
+  size_t i, max=0;\
+\
+  for (i=1; i<n; i++)\
+    max = (x[i] > x[max] ? i : max);\
+\
+  return max;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmin()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmin(size_t n, TYPE *x)\
+{\
+  size_t i, min=0;\
+\
+  for (i=1; i<n; i++)\
+    min = (x[i] < x[min] ? i : min);\
+\
+  return min;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax_n()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t k)\
+{\
+  size_t i, max_n;\
+  PRFX ## kv_t *cand;\
+\
+  cand = PRFX ## kvmalloc(n, "GK_ARGMAX_N: cand");\
+\
+  for (i=0; i<n; i++) {\
+    cand[i].val = i;\
+    cand[i].key = x[i];\
+  }\
+  PRFX ## kvsortd(n, cand);\
+\
+  max_n = cand[k-1].val;\
+\
+  gk_free((void *)&cand, LTERM);\
+\
+  return max_n;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?sum()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE sum = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    sum += (*x);\
+\
+  return sum;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?scale()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    (*x) *= alpha;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?norm2()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    partial += (*x) * (*x);\
+\
+  return (partial > 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?dot()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0.0;\
+ \
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    partial += (*x) * (*y);\
+\
+  return partial;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?axpy()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  TYPE *y_in = y;\
+\
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    *y += alpha*(*x);\
+\
+  return y_in;\
+}\
+
+
+
+#define GK_MKBLAS_PROTO(PRFX, TYPE, OUTTYPE) \
+  TYPE    *PRFX ## incset(size_t n, TYPE baseval, TYPE *x);\
+  TYPE     PRFX ## max(size_t n, TYPE *x);\
+  TYPE     PRFX ## min(size_t n, TYPE *x);\
+  size_t   PRFX ## argmax(size_t n, TYPE *x);\
+  size_t   PRFX ## argmin(size_t n, TYPE *x);\
+  size_t   PRFX ## argmax_n(size_t n, TYPE *x, size_t k);\
+  OUTTYPE  PRFX ## sum(size_t n, TYPE *x, size_t incx);\
+  TYPE    *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## norm2(size_t n, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+  TYPE    *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkmemory.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkmemory.h
new file mode 100644
index 000000000..78e216e0e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkmemory.h
@@ -0,0 +1,142 @@
+/*!
+\file  gk_mkmemory.h
+\brief Templates for memory allocation routines
+
+\date   Started 3/29/07
+\author George
+\version\verbatim $Id: gk_mkmemory.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKMEMORY_H_
+#define _GK_MKMEMORY_H_
+
+
+#define GK_MKALLOC(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?malloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## malloc(size_t n, char *msg)\
+{\
+  return (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?realloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg)\
+{\
+  return (TYPE *)gk_realloc((void *)ptr, sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?smalloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg)\
+{\
+  TYPE *ptr;\
+\
+  ptr = (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+  if (ptr == NULL) \
+    return NULL; \
+\
+  return PRFX ## set(n, ival, ptr); \
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = val;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b)\
+{\
+  return (TYPE *)memmove((void *)b, (void *)a, sizeof(TYPE)*n);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg)\
+{\
+  gk_idx_t i, j;\
+  TYPE **matrix;\
+\
+  matrix = (TYPE **)gk_malloc(ndim1*sizeof(TYPE *), errmsg);\
+  if (matrix == NULL) \
+    return NULL;\
+\
+  for (i=0; i<ndim1; i++) { \
+    matrix[i] = PRFX ## smalloc(ndim2, value, errmsg);\
+    if (matrix[i] == NULL) { \
+      for (j=0; j<i; j++) \
+        gk_free((void **)&matrix[j], LTERM); \
+      return NULL; \
+    } \
+  }\
+\
+  return matrix;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2)\
+{\
+  gk_idx_t i;\
+  TYPE **matrix;\
+\
+  if (*r_matrix == NULL) \
+    return; \
+\
+  matrix = *r_matrix;\
+\
+  for (i=0; i<ndim1; i++) \
+    gk_free((void **)&(matrix[i]), LTERM);\
+\
+  gk_free((void **)r_matrix, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?SetMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value)\
+{\
+  gk_idx_t i, j;\
+\
+  for (i=0; i<ndim1; i++) {\
+    for (j=0; j<ndim2; j++)\
+      matrix[i][j] = value;\
+  }\
+}\
+
+
+#define GK_MKALLOC_PROTO(PRFX, TYPE)\
+  TYPE  *PRFX ## malloc(size_t n, char *msg);\
+  TYPE  *PRFX ## realloc(TYPE *ptr, size_t n, char *msg);\
+  TYPE  *PRFX ## smalloc(size_t n, TYPE ival, char *msg);\
+  TYPE  *PRFX ## set(size_t n, TYPE val, TYPE *x);\
+  TYPE  *PRFX ## copy(size_t n, TYPE *a, TYPE *b);\
+  TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg);\
+  void   PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2);\
+  void   PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value);\
+
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue.h
new file mode 100644
index 000000000..3da7d26c0
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue.h
@@ -0,0 +1,437 @@
+/*!
+\file  gk_mkpqueue.h
+\brief Templates for priority queues
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE_H
+#define _GK_MKPQUEUE_H
+
+
+#define GK_MKPQUEUE(FPRFX, PQT, KVT, KT, VT, KVMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create(size_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate: queue");\
+  FPRFX ## Init(queue, maxnodes);\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function initializes the data structures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Init(PQT *queue, size_t maxnodes)\
+{\
+  queue->nnodes = 0;\
+  queue->maxnodes = maxnodes;\
+\
+  queue->heap    = KVMALLOC(maxnodes, "gk_PQInit: heap");\
+  queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset(PQT *queue)\
+{\
+  gk_idx_t i;\
+  gk_idx_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  for (i=queue->nnodes-1; i>=0; i--)\
+    locator[heap[i].val] = -1;\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Free(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->heap, &queue->locator, LTERM);\
+  queue->maxnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue \
+    and the queue itself */\
+/**************************************************************************/\
+void FPRFX ## Destroy(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  FPRFX ## Free(queue);\
+  gk_free((void **)&queue, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Insert(PQT *queue, VT node, KT key)\
+{\
+  gk_idx_t i, j;\
+  gk_idx_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  ASSERT(locator[node] == -1);\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, heap[j].key)) {\
+      heap[i] = heap[j];\
+      locator[heap[i].val] = i;\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  heap[i].key   = key;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function deletes an item from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Delete(PQT *queue, VT node)\
+{\
+  gk_idx_t i, j, nnodes;\
+  KT newkey, oldkey;\
+  gk_idx_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+  locator[node] = -1;\
+\
+  if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\
+    node   = heap[queue->nnodes].val;\
+    newkey = heap[queue->nnodes].key;\
+    oldkey = heap[i].key;\
+\
+    if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+      while (i > 0) {\
+        j = (i-1)>>1;\
+        if (KEY_LT(newkey, heap[j].key)) {\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+    else { /* Filter down */\
+      nnodes = queue->nnodes;\
+      while ((j=(i<<1)+1) < nnodes) {\
+        if (KEY_LT(heap[j].key, newkey)) {\
+          if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+            j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+          j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+\
+    heap[i].key   = newkey;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function updates the key values associated for a particular item */ \
+/**************************************************************************/\
+void FPRFX ## Update(PQT *queue, VT node, KT newkey)\
+{\
+  gk_idx_t i, j, nnodes;\
+  KT oldkey;\
+  gk_idx_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  oldkey = heap[locator[node]].key;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+\
+  if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+    while (i > 0) {\
+      j = (i-1)>>1;\
+      if (KEY_LT(newkey, heap[j].key)) {\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+  else { /* Filter down */\
+    nnodes = queue->nnodes;\
+    while ((j=(i<<1)+1) < nnodes) {\
+      if (KEY_LT(heap[j].key, newkey)) {\
+        if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+        j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+\
+  heap[i].key   = newkey;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+VT FPRFX ## GetTop(PQT *queue)\
+{\
+  gk_idx_t i, j;\
+  gk_idx_t *locator;\
+  KVT *heap;\
+  VT vtx, node;\
+  KT key;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  queue->nnodes--;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  vtx = heap[0].val;\
+  locator[vtx] = -1;\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key  = heap[i].key;\
+    node = heap[i].val;\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(heap[j].key, key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\
+        j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    heap[i].key   = key;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+  return vtx;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+VT FPRFX ## SeeTopVal(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of a specific item */\
+/**************************************************************************/\
+KT FPRFX ## SeeKey(PQT *queue, VT node)\
+{\
+  gk_idx_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  return heap[locator[node]].key;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the first item in a breadth-first traversal of\
+    the heap whose key is less than maxwgt. This function is here due to\
+    hMETIS and is not general!*/\
+/**************************************************************************/\
+/*\
+VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\
+{\
+  gk_idx_t i;\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  if (maxwgt <= 1000)\
+    return FPRFX ## SeeTopVal(queue);\
+\
+  for (i=0; i<queue->nnodes; i++) {\
+    if (queue->heap[i].key > 0) {\
+      if (wgts[queue->heap[i].val] <= maxwgt)\
+        return queue->heap[i].val;\
+    }\
+    else {\
+      if (queue->heap[i/2].key <= 0)\
+        break;\
+    }\
+  }\
+\
+  return queue->heap[0].val;\
+\
+}\
+*/\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap(PQT *queue)\
+{\
+  gk_idx_t i, j;\
+  size_t nnodes;\
+  gk_idx_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+  nnodes  = queue->nnodes;\
+\
+  if (nnodes == 0)\
+    return 1;\
+\
+  ASSERT(locator[heap[0].val] == 0);\
+  for (i=1; i<nnodes; i++) {\
+    ASSERT(locator[heap[i].val] == i);\
+    ASSERT(!KEY_LT(heap[i].key, heap[(i-1)/2].key));\
+  }\
+  for (i=1; i<nnodes; i++)\
+    ASSERT(!KEY_LT(heap[i].key, heap[0].key));\
+\
+  for (j=i=0; i<queue->maxnodes; i++) {\
+    if (locator[i] != -1)\
+      j++;\
+  }\
+  ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create(size_t maxnodes);\
+  void   FPRFX ## Init(PQT *queue, size_t maxnodes);\
+  void   FPRFX ## Reset(PQT *queue);\
+  void   FPRFX ## Free(PQT *queue);\
+  void   FPRFX ## Destroy(PQT *queue);\
+  size_t FPRFX ## Length(PQT *queue);\
+  int    FPRFX ## Insert(PQT *queue, VT node, KT key);\
+  int    FPRFX ## Delete(PQT *queue, VT node);\
+  void   FPRFX ## Update(PQT *queue, VT node, KT newkey);\
+  VT     FPRFX ## GetTop(PQT *queue);\
+  VT     FPRFX ## SeeTopVal(PQT *queue);\
+  KT     FPRFX ## SeeTopKey(PQT *queue);\
+  KT     FPRFX ## SeeKey(PQT *queue, VT node);\
+  VT     FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\
+  int    FPRFX ## CheckHeap(PQT *queue);\
+
+
+/* This is how these macros are used
+GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX)
+GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t)
+*/
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue2.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue2.h
new file mode 100644
index 000000000..10e8ee462
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkpqueue2.h
@@ -0,0 +1,215 @@
+/*!
+\file  gk_mkpqueue2.h
+\brief Templates for priority queues that do not utilize locators and as such
+       they can use different types of values.
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE2_H
+#define _GK_MKPQUEUE2_H
+
+
+#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create2(ssize_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\
+    memset(queue, 0, sizeof(PQT));\
+    queue->nnodes   = 0;\
+    queue->maxnodes = maxnodes;\
+    queue->keys     = KMALLOC(maxnodes, "gk_pqCreate2: keys");\
+    queue->vals     = VMALLOC(maxnodes, "gk_pqCreate2: vals");\
+\
+    if (queue->keys == NULL || queue->vals == NULL)\
+      gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  }\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset2(PQT *queue)\
+{\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Destroy2(PQT **r_queue)\
+{\
+  PQT *queue = *r_queue; \
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  *r_queue = NULL;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length2(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue. */\
+/**************************************************************************/\
+int FPRFX ## Insert2(PQT *queue, VT val, KT key)\
+{\
+  ssize_t i, j;\
+  KT *keys=queue->keys;\
+  VT *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == queue->maxnodes) \
+    return 0;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, keys[j])) {\
+      keys[i] = keys[j];\
+      vals[i] = vals[j];\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  keys[i] = key;\
+  vals[i] = val;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## GetTop2(PQT *queue, VT *r_val)\
+{\
+  ssize_t i, j;\
+  KT key, *keys=queue->keys;\
+  VT val, *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == 0)\
+    return 0;\
+\
+  queue->nnodes--;\
+\
+  *r_val = vals[0];\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key = keys[i];\
+    val = vals[i];\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(keys[j], key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\
+          j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\
+        j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    keys[i] = key;\
+    vals[i] = val;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\
+{\
+  if (queue->nnodes == 0) \
+    return 0;\
+\
+  *r_val = queue->vals[0];\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey2(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap2(PQT *queue)\
+{\
+  ssize_t i;\
+  KT *keys=queue->keys;\
+\
+  if (queue->nnodes == 0)\
+    return 1;\
+\
+  for (i=1; i<queue->nnodes; i++) {\
+    ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\
+  }\
+  for (i=1; i<queue->nnodes; i++)\
+    ASSERT(!KEY_LT(keys[i], keys[0]));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create2(ssize_t maxnodes);\
+  void   FPRFX ## Reset2(PQT *queue);\
+  void   FPRFX ## Destroy2(PQT **r_queue);\
+  size_t FPRFX ## Length2(PQT *queue);\
+  int    FPRFX ## Insert2(PQT *queue, VT node, KT key);\
+  int    FPRFX ## GetTop2(PQT *queue, VT *r_val);\
+  int    FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\
+  KT     FPRFX ## SeeTopKey2(PQT *queue);\
+  int    FPRFX ## CheckHeap2(PQT *queue);\
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkrandom.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkrandom.h
new file mode 100644
index 000000000..68d54fa3f
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkrandom.h
@@ -0,0 +1,123 @@
+/*!
+\file  
+\brief Templates for portable random number generation
+
+\date   Started 5/17/07
+\author George
+\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKRANDOM_H
+#define _GK_MKRANDOM_H
+
+/*************************************************************************/\
+/*! The generator for the rand() related routines.  \
+   \params RNGT  the datatype that defines the range of values over which\
+                 random numbers will be generated\
+   \params VALT  the datatype that defines the contents of the array to \
+                 be permuted by randArrayPermute() \
+   \params FPRFX the function prefix \
+*/\
+/**************************************************************************/\
+#define GK_MKRANDOM(FPRFX, RNGT, VALT)\
+/*************************************************************************/\
+/*! Initializes the generator */ \
+/**************************************************************************/\
+void FPRFX ## srand(RNGT seed) \
+{\
+  gk_randinit((uint64_t) seed);\
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number */ \
+/**************************************************************************/\
+RNGT FPRFX ## rand() \
+{\
+  if (sizeof(RNGT) <= sizeof(int32_t)) \
+    return (RNGT)gk_randint32(); \
+  else \
+    return (RNGT)gk_randint64(); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number between [0, max) */ \
+/**************************************************************************/\
+RNGT FPRFX ## randInRange(RNGT max) \
+{\
+  return (RNGT)((FPRFX ## rand())%max); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\
+{\
+  RNGT i, u, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  if (n < 10) {\
+    for (i=0; i<n; i++) {\
+      v = FPRFX ## randInRange(n);\
+      u = FPRFX ## randInRange(n);\
+      gk_SWAP(p[v], p[u], tmp);\
+    }\
+  }\
+  else {\
+    for (i=0; i<nshuffles; i++) {\
+      v = FPRFX ## randInRange(n-3);\
+      u = FPRFX ## randInRange(n-3);\
+      /*gk_SWAP(p[v+0], p[u+0], tmp);*/\
+      /*gk_SWAP(p[v+1], p[u+1], tmp);*/\
+      /*gk_SWAP(p[v+2], p[u+2], tmp);*/\
+      /*gk_SWAP(p[v+3], p[u+3], tmp);*/\
+      gk_SWAP(p[v+0], p[u+2], tmp);\
+      gk_SWAP(p[v+1], p[u+3], tmp);\
+      gk_SWAP(p[v+2], p[u+0], tmp);\
+      gk_SWAP(p[v+3], p[u+1], tmp);\
+    }\
+  }\
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag)\
+{\
+  RNGT i, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  for (i=0; i<n; i++) {\
+    v = FPRFX ## randInRange(n);\
+    gk_SWAP(p[i], p[v], tmp);\
+  }\
+}\
+
+
+#define GK_MKRANDOM_PROTO(FPRFX, RNGT, VALT)\
+  void FPRFX ## srand(RNGT seed); \
+  RNGT FPRFX ## rand(); \
+  RNGT FPRFX ## randInRange(RNGT max); \
+  void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag);\
+  void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag);\
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mksort.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mksort.h
new file mode 100644
index 000000000..56ac0b11a
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mksort.h
@@ -0,0 +1,273 @@
+/*!
+\file  gk_mksort.h
+\brief Templates for the qsort routine
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mksort.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKSORT_H_
+#define _GK_MKSORT_H_
+
+/* $Id: gk_mksort.h 10711 2011-08-31 22:23:04Z karypis $
+ * Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* in-line qsort implementation.  Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparision routine to the function, it is possible to inline
+ * comparision routine, thus speed up sorting alot.
+ *
+ * Usage:
+ *  #include "iqsort.h"
+ *  #define islt(a,b) (strcmp((*a),(*b))<0)
+ *  char *arr[];
+ *  int n;
+ *  GKQSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ *  GKQSORT(TYPE,BASE,NELT,ISLT)
+ *  1) type of each element, TYPE,
+ *  2) address of the beginning of the array, of type TYPE*,
+ *  3) number of elements in the array, and
+ *  4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ *  qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ *  #define islt(a,b) ((*a)<(*b))
+ *  GKQSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline.  That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use GKQSORT() macro directly instead a call to such routine.  Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ *   GKQSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ *   GKQSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ *   int key;
+ *   ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ *  GKQSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define _GKQSORT_MAX_THRESH 4
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+#define _GKQSORT_STACK_SIZE	    (8 * sizeof(size_t))
+#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define	_GKQSORT_POP(low, high, top)  ((--top, (low = top->_lo), (high = top->_hi)))
+#define	_GKQSORT_STACK_NOT_EMPTY	    (_stack < _top)
+
+
+/* The main code starts here... */
+#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT)   \
+{									\
+  GKQSORT_TYPE *const _base = (GKQSORT_BASE);				\
+  const size_t _elems = (GKQSORT_NELT);					\
+  GKQSORT_TYPE _hold;							\
+									\
+  if (_elems == 0)                                                      \
+    return;                                                             \
+                                                                        \
+  /* Don't declare two variables of type GKQSORT_TYPE in a single	\
+   * statement: eg `TYPE a, b;', in case if TYPE is a pointer,		\
+   * expands to `type* a, b;' wich isn't what we want.			\
+   */									\
+									\
+  if (_elems > _GKQSORT_MAX_THRESH) {					\
+    GKQSORT_TYPE *_lo = _base;						\
+    GKQSORT_TYPE *_hi = _lo + _elems - 1;				\
+    struct {								\
+      GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+    } _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1;			\
+									\
+    while (_GKQSORT_STACK_NOT_EMPTY) {					\
+      GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr;		\
+									\
+      /* Select median value from among LO, MID, and HI. Rearrange	\
+         LO and HI so the three values are sorted. This lowers the	\
+         probability of picking a pathological pivot value and		\
+         skips a comparison for both the LEFT_PTR and RIGHT_PTR in	\
+         the while loops. */						\
+									\
+      GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1);			\
+									\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+      if (GKQSORT_LT (_hi, _mid))					\
+        _GKQSORT_SWAP (_mid, _hi, _hold);				\
+      else								\
+        goto _jump_over;						\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+  _jump_over:;								\
+									\
+      _left_ptr  = _lo + 1;						\
+      _right_ptr = _hi - 1;						\
+									\
+      /* Here's the famous ``collapse the walls'' section of quicksort.	\
+         Gotta like those tight inner loops!  They are the main reason	\
+         that this algorithm runs much faster than others. */		\
+      do {								\
+        while (GKQSORT_LT (_left_ptr, _mid))				\
+         ++_left_ptr;							\
+									\
+        while (GKQSORT_LT (_mid, _right_ptr))				\
+          --_right_ptr;							\
+									\
+        if (_left_ptr < _right_ptr) {					\
+          _GKQSORT_SWAP (_left_ptr, _right_ptr, _hold);			\
+          if (_mid == _left_ptr)					\
+            _mid = _right_ptr;						\
+          else if (_mid == _right_ptr)					\
+            _mid = _left_ptr;						\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+        }								\
+        else if (_left_ptr == _right_ptr) {				\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+          break;							\
+        }								\
+      } while (_left_ptr <= _right_ptr);				\
+									\
+     /* Set up pointers for next iteration.  First determine whether	\
+        left and right partitions are below the threshold size.  If so,	\
+        ignore one or both.  Otherwise, push the larger partition's	\
+        bounds on the stack and continue sorting the smaller one. */	\
+									\
+      if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) {			\
+        if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+          /* Ignore both small partitions. */				\
+          _GKQSORT_POP (_lo, _hi, _top);				\
+        else								\
+          /* Ignore small left partition. */				\
+          _lo = _left_ptr;						\
+      }									\
+      else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+        /* Ignore small right partition. */				\
+        _hi = _right_ptr;						\
+      else if (_right_ptr - _lo > _hi - _left_ptr) {			\
+        /* Push larger left partition indices. */			\
+        _GKQSORT_PUSH (_top, _lo, _right_ptr);				\
+        _lo = _left_ptr;						\
+      }									\
+      else {								\
+        /* Push larger right partition indices. */			\
+        _GKQSORT_PUSH (_top, _left_ptr, _hi);				\
+        _hi = _right_ptr;						\
+      }									\
+    }									\
+  }									\
+									\
+  /* Once the BASE array is partially sorted by quicksort the rest	\
+     is completely sorted using insertion sort, since this is efficient	\
+     for partitions below MAX_THRESH size. BASE points to the		\
+     beginning of the array to sort, and END_PTR points at the very	\
+     last element in the array (*not* one beyond it!). */		\
+									\
+  {									\
+    GKQSORT_TYPE *const _end_ptr = _base + _elems - 1;			\
+    GKQSORT_TYPE *_tmp_ptr = _base;					\
+    register GKQSORT_TYPE *_run_ptr;					\
+    GKQSORT_TYPE *_thresh;						\
+									\
+    _thresh = _base + _GKQSORT_MAX_THRESH;				\
+    if (_thresh > _end_ptr)						\
+      _thresh = _end_ptr;						\
+									\
+    /* Find smallest element in first threshold and place it at the	\
+       array's beginning.  This is the smallest array element,		\
+       and the operation speeds up insertion sort's inner loop. */	\
+									\
+    for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr)	\
+      if (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        _tmp_ptr = _run_ptr;						\
+									\
+    if (_tmp_ptr != _base)						\
+      _GKQSORT_SWAP (_tmp_ptr, _base, _hold);				\
+									\
+    /* Insertion sort, running from left-hand-side			\
+     * up to right-hand-side.  */					\
+									\
+    _run_ptr = _base + 1;						\
+    while (++_run_ptr <= _end_ptr) {					\
+      _tmp_ptr = _run_ptr - 1;						\
+      while (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        --_tmp_ptr;							\
+									\
+      ++_tmp_ptr;							\
+      if (_tmp_ptr != _run_ptr) {					\
+        GKQSORT_TYPE *_trav = _run_ptr + 1;				\
+        while (--_trav >= _run_ptr) {					\
+          GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+          _hold = *_trav;						\
+									\
+          for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo)		\
+            *_hi = *_lo;						\
+          *_hi = _hold;							\
+        }								\
+      }									\
+    }									\
+  }									\
+									\
+}
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_mkutils.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkutils.h
new file mode 100644
index 000000000..a092f2227
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_mkutils.h
@@ -0,0 +1,40 @@
+/*!
+\file  
+\brief Templates for various utility routines
+
+\date   Started 5/28/07
+\author George
+\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKUTILS_H_
+#define _GK_MKUTILS_H_
+
+
+#define GK_MKARRAY2CSR(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?array2csr() routine */\
+/**************************************************************************/\
+void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\
+{\
+  TYPE i;\
+\
+  for (i=0; i<=range; i++)\
+    ptr[i] = 0;\
+\
+  for (i=0; i<n; i++)\
+    ptr[array[i]]++;\
+\
+  /* Compute the ptr, ind structure */\
+  MAKECSR(i, range, ptr);\
+  for (i=0; i<n; i++)\
+    ind[ptr[array[i]]++] = i;\
+  SHIFTCSR(i, range, ptr);\
+}
+
+
+#define GK_MKARRAY2CSR_PROTO(PRFX, TYPE)\
+  void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind);\
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_proto.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_proto.h
new file mode 100644
index 000000000..2cc299d4c
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_proto.h
@@ -0,0 +1,381 @@
+/*!
+\file gk_proto.h
+\brief This file contains function prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_proto.h 12591 2012-09-01 19:03:15Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_PROTO_H_
+#define _GK_PROTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*-------------------------------------------------------------
+ * blas.c 
+ *-------------------------------------------------------------*/
+GK_MKBLAS_PROTO(gk_c,   char,     int)
+GK_MKBLAS_PROTO(gk_i,   int,      int)
+GK_MKBLAS_PROTO(gk_i32, int32_t,  int32_t)
+GK_MKBLAS_PROTO(gk_i64, int64_t,  int64_t)
+GK_MKBLAS_PROTO(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS_PROTO(gk_f,   float,    float)
+GK_MKBLAS_PROTO(gk_d,   double,   double)
+GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
+/*-------------------------------------------------------------
+ * io.c
+ *-------------------------------------------------------------*/
+FILE *gk_fopen(char *, char *, const char *);
+void gk_fclose(FILE *);
+gk_idx_t gk_getline(char **lineptr, size_t *n, FILE *stream);
+char **gk_readfile(char *fname, gk_idx_t *r_nlines);
+int32_t *gk_i32readfile(char *fname, gk_idx_t *r_nlines);
+int64_t *gk_i64readfile(char *fname, gk_idx_t *r_nlines);
+int32_t *gk_i32readfilebin(char *fname, ssize_t *r_nelmnts);
+int64_t *gk_i64readfilebin(char *fname, ssize_t *r_nelmnts);
+float *gk_freadfilebin(char *fname, ssize_t *r_nelmnts);
+size_t gk_fwritefilebin(char *fname, size_t n, float *a);
+double *gk_dreadfilebin(char *fname, ssize_t *r_nelmnts);
+
+
+
+
+/*-------------------------------------------------------------
+ * fs.c
+ *-------------------------------------------------------------*/
+int gk_fexists(char *);
+int gk_dexists(char *);
+intmax_t gk_getfsize(char *);
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+          size_t *r_max_nlntokens, size_t *r_nbytes);
+char *gk_getbasename(char *path);
+char *gk_getextname(char *path);
+char *gk_getfilename(char *path);
+char *gk_getpathname(char *path);
+int gk_mkpath(char *);
+int gk_rmpath(char *);
+
+
+
+/*-------------------------------------------------------------
+ * memory.c
+ *-------------------------------------------------------------*/
+GK_MKALLOC_PROTO(gk_c,   char)
+GK_MKALLOC_PROTO(gk_i,   int)
+GK_MKALLOC_PROTO(gk_i32, int32_t)
+GK_MKALLOC_PROTO(gk_i64, int64_t)
+GK_MKALLOC_PROTO(gk_z,   ssize_t)
+GK_MKALLOC_PROTO(gk_f,   float)
+GK_MKALLOC_PROTO(gk_d,   double)
+GK_MKALLOC_PROTO(gk_idx, gk_idx_t)
+
+GK_MKALLOC_PROTO(gk_ckv,   gk_ckv_t)
+GK_MKALLOC_PROTO(gk_ikv,   gk_ikv_t)
+GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC_PROTO(gk_zkv,   gk_zkv_t)
+GK_MKALLOC_PROTO(gk_fkv,   gk_fkv_t)
+GK_MKALLOC_PROTO(gk_dkv,   gk_dkv_t)
+GK_MKALLOC_PROTO(gk_skv,   gk_skv_t)
+GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t)
+
+void   gk_AllocMatrix(void ***, size_t, size_t , size_t);
+void   gk_FreeMatrix(void ***, size_t, size_t);
+int    gk_malloc_init();
+void   gk_malloc_cleanup(int showstats);
+void  *gk_malloc(size_t nbytes, char *msg);
+void  *gk_realloc(void *oldptr, size_t nbytes, char *msg);
+void   gk_free(void **ptr1,...);
+size_t gk_GetCurMemoryUsed();
+size_t gk_GetMaxMemoryUsed();
+
+
+
+/*-------------------------------------------------------------
+ * seq.c
+ *-------------------------------------------------------------*/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name);
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet);
+void gk_seq_init(gk_seq_t *seq);
+
+
+
+
+/*-------------------------------------------------------------
+ * pdb.c
+ *-------------------------------------------------------------*/
+char gk_threetoone(char *res);
+void gk_freepdbf(pdbf *p);
+pdbf *gk_readpdbfile(char *fname);
+void gk_writefullatom(pdbf *p, char *fname);
+void gk_writebackbone(pdbf *p, char *fname);
+void gk_writealphacarbons(pdbf *p, char *fname);
+void gk_showcorruption(pdbf *p);
+
+
+/*-------------------------------------------------------------
+ * error.c
+ *-------------------------------------------------------------*/
+void gk_set_exit_on_error(int value);
+void errexit(char *,...);
+void gk_errexit(int signum, char *,...);
+int gk_sigtrap();
+int gk_siguntrap();
+void gk_sigthrow(int signum);
+void gk_SetSignalHandlers();
+void gk_UnsetSignalHandlers();
+void gk_NonLocalExit_Handler(int signum);
+char *gk_strerror(int errnum);
+void PrintBackTrace();
+
+
+/*-------------------------------------------------------------
+ * util.c
+ *-------------------------------------------------------------*/
+void  gk_RandomPermute(size_t, int *, int);
+void  gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind);
+int   gk_log2(int);
+int   gk_ispow2(int);
+float gk_flog2(float);
+
+
+/*-------------------------------------------------------------
+ * time.c
+ *-------------------------------------------------------------*/
+gk_wclock_t gk_WClockSeconds(void);
+double gk_CPUSeconds(void);
+
+/*-------------------------------------------------------------
+ * string.c
+ *-------------------------------------------------------------*/
+char   *gk_strchr_replace(char *str, char *fromlist, char *tolist);
+int     gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str);
+char   *gk_strtprune(char *, char *);
+char   *gk_strhprune(char *, char *);
+char   *gk_strtoupper(char *); 
+char   *gk_strtolower(char *); 
+char   *gk_strdup(char *orgstr);
+int     gk_strcasecmp(char *s1, char *s2);
+int     gk_strrcmp(char *s1, char *s2);
+char   *gk_time2str(time_t time);
+time_t  gk_str2time(char *str);
+int     gk_GetStringID(gk_StringMap_t *strmap, char *key);
+
+
+
+/*-------------------------------------------------------------
+ * sort.c 
+ *-------------------------------------------------------------*/
+void gk_csorti(size_t, char *);
+void gk_csortd(size_t, char *);
+void gk_isorti(size_t, int *);
+void gk_isortd(size_t, int *);
+void gk_fsorti(size_t, float *);
+void gk_fsortd(size_t, float *);
+void gk_dsorti(size_t, double *);
+void gk_dsortd(size_t, double *);
+void gk_idxsorti(size_t, gk_idx_t *);
+void gk_idxsortd(size_t, gk_idx_t *);
+void gk_ckvsorti(size_t, gk_ckv_t *);
+void gk_ckvsortd(size_t, gk_ckv_t *);
+void gk_ikvsorti(size_t, gk_ikv_t *);
+void gk_ikvsortd(size_t, gk_ikv_t *);
+void gk_i32kvsorti(size_t, gk_i32kv_t *);
+void gk_i32kvsortd(size_t, gk_i32kv_t *);
+void gk_i64kvsorti(size_t, gk_i64kv_t *);
+void gk_i64kvsortd(size_t, gk_i64kv_t *);
+void gk_zkvsorti(size_t, gk_zkv_t *);
+void gk_zkvsortd(size_t, gk_zkv_t *);
+void gk_fkvsorti(size_t, gk_fkv_t *);
+void gk_fkvsortd(size_t, gk_fkv_t *);
+void gk_dkvsorti(size_t, gk_dkv_t *);
+void gk_dkvsortd(size_t, gk_dkv_t *);
+void gk_skvsorti(size_t, gk_skv_t *);
+void gk_skvsortd(size_t, gk_skv_t *);
+void gk_idxkvsorti(size_t, gk_idxkv_t *);
+void gk_idxkvsortd(size_t, gk_idxkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Selection routines
+ *-------------------------------------------------------------*/
+int  gk_dfkvkselect(size_t, int, gk_fkv_t *);
+int  gk_ifkvkselect(size_t, int, gk_fkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Priority queue 
+ *-------------------------------------------------------------*/
+GK_MKPQUEUE_PROTO(gk_ipq,   gk_ipq_t,   int,      gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_fpq,   gk_fpq_t,   float,    gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_dpq,   gk_dpq_t,   double,   gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t)
+
+
+/*-------------------------------------------------------------
+ * HTable routines
+ *-------------------------------------------------------------*/
+gk_HTable_t *HTable_Create(int nelements);
+void         HTable_Reset(gk_HTable_t *htable);
+void         HTable_Resize(gk_HTable_t *htable, int nelements);
+void         HTable_Insert(gk_HTable_t *htable, int key, int val);
+void         HTable_Delete(gk_HTable_t *htable, int key);
+int          HTable_Search(gk_HTable_t *htable, int key);
+int          HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type);
+int          HTable_SearchAndDelete(gk_HTable_t *htable, int key);
+void         HTable_Destroy(gk_HTable_t *htable);
+int          HTable_HFunction(int nelements, int key);
+ 
+
+/*-------------------------------------------------------------
+ * Tokenizer routines
+ *-------------------------------------------------------------*/
+void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens);
+void gk_freetokenslist(gk_Tokens_t *tokens);
+
+/*-------------------------------------------------------------
+ * Encoder/Decoder
+ *-------------------------------------------------------------*/
+void encodeblock(unsigned char *in, unsigned char *out);
+void decodeblock(unsigned char *in, unsigned char *out);
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+
+
+/*-------------------------------------------------------------
+ * random.c
+ *-------------------------------------------------------------*/
+GK_MKRANDOM_PROTO(gk_c,   size_t, char)
+GK_MKRANDOM_PROTO(gk_i,   size_t, int)
+GK_MKRANDOM_PROTO(gk_f,   size_t, float)
+GK_MKRANDOM_PROTO(gk_d,   size_t, double)
+GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM_PROTO(gk_z,   size_t, ssize_t)
+void gk_randinit(uint64_t);
+uint64_t gk_randint64(void);
+uint32_t gk_randint32(void);
+
+
+/*-------------------------------------------------------------
+ * OpenMP fake functions
+ *-------------------------------------------------------------*/
+#if !defined(__OPENMP__)
+void omp_set_num_threads(int num_threads);
+int omp_get_num_threads(void);
+int omp_get_max_threads(void);
+int omp_get_thread_num(void);
+int omp_get_num_procs(void);
+int omp_in_parallel(void);
+void omp_set_dynamic(int num_threads);
+int omp_get_dynamic(void);
+void omp_set_nested(int nested);
+int omp_get_nested(void);
+#endif /* __OPENMP__ */
+
+
+/*-------------------------------------------------------------
+ * CSR-related functions
+ *-------------------------------------------------------------*/
+gk_csr_t *gk_csr_Create();
+void gk_csr_Init(gk_csr_t *mat);
+void gk_csr_Free(gk_csr_t **mat);
+void gk_csr_FreeContents(gk_csr_t *mat);
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat);
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows);
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind);
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid);
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color);
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering);
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering);
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf);
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction);
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval);
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore);
+void gk_csr_CompactColumns(gk_csr_t *mat);
+void gk_csr_SortIndices(gk_csr_t *mat, int what);
+void gk_csr_CreateIndex(gk_csr_t *mat, int what);
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm);
+void gk_csr_Scale(gk_csr_t *mat, int type);
+void gk_csr_ComputeSums(gk_csr_t *mat, int what);
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what);
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype);
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval,
+        int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker,
+        gk_fkv_t *i_cand);
+
+
+
+/* itemsets.c */
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
+        int minfreq, int maxfreq, int minlen, int maxlen,
+        void (*process_itemset)(void *stateptr, int nitems, int *itemind,
+                                int ntrans, int *tranind),
+        void *stateptr);
+
+
+/* evaluate.c */
+float ComputeAccuracy(int n, gk_fkv_t *list);
+float ComputeROCn(int n, int maxN, gk_fkv_t *list);
+float ComputeMedianRFP(int n, gk_fkv_t *list);
+float ComputeMean (int n, float *values);
+float ComputeStdDev(int  n, float *values);
+
+
+/* mcore.c */
+gk_mcore_t *gk_mcoreCreate(size_t coresize);
+gk_mcore_t *gk_gkmcoreCreate();
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes);
+void gk_mcorePush(gk_mcore_t *mcore);
+void gk_gkmcorePush(gk_mcore_t *mcore);
+void gk_mcorePop(gk_mcore_t *mcore);
+void gk_gkmcorePop(gk_mcore_t *mcore);
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr);
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr);
+
+/* rw.c */
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr);
+
+
+/* graph.c */
+gk_graph_t *gk_graph_Create();
+void gk_graph_Init(gk_graph_t *graph);
+void gk_graph_Free(gk_graph_t **graph);
+void gk_graph_FreeContents(gk_graph_t *graph);
+gk_graph_t *gk_graph_Read(char *filename, int format, int isfewgts,
+                    int isfvwgts, int isfvsizes);
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format);
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph);
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs);
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm);
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind);
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, 
+         int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps);
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_struct.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_struct.h
new file mode 100644
index 000000000..3ef7bbd7b
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_struct.h
@@ -0,0 +1,268 @@
+/*!
+\file gk_struct.h
+\brief This file contains various datastructures used/provided by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_struct.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_STRUCT_H_
+#define _GK_STRUCT_H_
+
+
+/********************************************************************/
+/*! Generator for gk_??KeyVal_t data structure */
+/********************************************************************/
+#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \
+typedef struct {\
+  KEYTYPE key;\
+  VALTYPE val;\
+} NAME;\
+
+/* The actual KeyVal data structures */
+GK_MKKEYVALUE_T(gk_ckv_t,   char,     ssize_t)
+GK_MKKEYVALUE_T(gk_ikv_t,   int,      ssize_t)
+GK_MKKEYVALUE_T(gk_i32kv_t, int32_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_i64kv_t, int64_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_zkv_t,   ssize_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_fkv_t,   float,    ssize_t)
+GK_MKKEYVALUE_T(gk_dkv_t,   double,   ssize_t)
+GK_MKKEYVALUE_T(gk_skv_t,   char *,   ssize_t)
+GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t)
+
+
+
+/********************************************************************/
+/*! Generator for gk_?pq_t data structure */
+/********************************************************************/
+#define GK_MKPQUEUE_T(NAME, KVTYPE)\
+typedef struct {\
+  gk_idx_t nnodes;\
+  gk_idx_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KVTYPE   *heap;\
+  gk_idx_t *locator;\
+} NAME;\
+
+GK_MKPQUEUE_T(gk_ipq_t,    gk_ikv_t)
+GK_MKPQUEUE_T(gk_i32pq_t,  gk_i32kv_t)
+GK_MKPQUEUE_T(gk_i64pq_t,  gk_i64kv_t)
+GK_MKPQUEUE_T(gk_fpq_t,    gk_fkv_t)
+GK_MKPQUEUE_T(gk_dpq_t,    gk_dkv_t)
+GK_MKPQUEUE_T(gk_idxpq_t,  gk_idxkv_t)
+
+
+#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\
+typedef struct {\
+  ssize_t nnodes;\
+  ssize_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KTYPE *keys;\
+  VTYPE *vals;\
+} NAME;\
+
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse CSR format
+ *-------------------------------------------------------------*/
+typedef struct gk_csr_t {
+  int32_t nrows, ncols;
+  ssize_t *rowptr, *colptr;
+  int32_t *rowind, *colind;
+  int32_t *rowids, *colids;
+  float *rowval, *colval;
+  float *rnorms, *cnorms;
+  float *rsums, *csums;
+  float *rsizes, *csizes;
+  float *rvols, *cvols;
+  float *rwgts, *cwgts;
+} gk_csr_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse graph 
+ *-------------------------------------------------------------*/
+typedef struct gk_graph_t {
+  int32_t nvtxs;                /*!< The number of vertices in the graph */
+  ssize_t *xadj;                /*!< The ptr-structure of the adjncy list */
+  int32_t *adjncy;              /*!< The adjacency list of the graph */
+  int32_t *iadjwgt;             /*!< The integer edge weights */
+  float *fadjwgt;               /*!< The floating point edge weights */
+  int32_t *ivwgts;              /*!< The integer vertex weights */
+  float *fvwgts;                /*!< The floating point vertex weights */
+  int32_t *ivsizes;             /*!< The integer vertex sizes */
+  float *fvsizes;               /*!< The floating point vertex sizes */
+  int32_t *vlabels;             /*!< The labels of the vertices */
+} gk_graph_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores stores a string as a 
+ * pair of its allocated buffer and the buffer itself.
+ *-------------------------------------------------------------*/
+typedef struct gk_str_t {
+  size_t len;
+  char *buf;
+} gk_str_t;
+
+
+
+
+/*-------------------------------------------------------------
+* The following data structure implements a string-2-int mapping
+* table used for parsing command-line options
+*-------------------------------------------------------------*/
+typedef struct gk_StringMap_t {
+  char *name;
+  int id;
+} gk_StringMap_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a simple hash table
+ *------------------------------------------------------------*/
+typedef struct gk_HTable_t {
+  int nelements;          /* The overall size of the hash-table */
+  int htsize;             /* The current size of the hash-table */
+  gk_ikv_t *harray;       /* The actual hash-table */
+} gk_HTable_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a gk_Tokens_t list returned by the
+ * string tokenizer
+ *------------------------------------------------------------*/
+typedef struct gk_Tokens_t {
+  int ntoks;        /* The number of tokens in the input string */
+  char *strbuf;     /* The memory that stores all the entries */
+  char **list;      /* Pointers to the strbuf for each element */
+} gk_Tokens_t;
+
+/*------------------------------------------------------------
+ * This structure implements storage for an atom in a pdb file
+ *------------------------------------------------------------*/
+typedef struct atom {
+  int       serial;
+  char      *name;
+  char	    altLoc;
+  char      *resname;
+  char      chainid;	
+  int       rserial;
+  char	    icode;
+  char      element;
+  double    x;
+  double    y;
+  double    z;
+  double    opcy;
+  double    tmpt;
+} atom;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a center of mass for
+ * a single residue.
+ *------------------------------------------------------------*/
+typedef struct center_of_mass {
+  char name;
+  double x;
+  double y;
+  double z;
+} center_of_mass;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a pdb protein 
+ *------------------------------------------------------------*/
+typedef struct pdbf {
+	int natoms;			/* Number of atoms */
+	int nresidues;  /* Number of residues based on coordinates */
+	int ncas;
+	int nbbs;
+	int corruption;
+	char *resSeq;	      /* Residue sequence based on coordinates    */
+  char **threeresSeq; /* three-letter residue sequence */
+	atom *atoms;
+	atom **bbs;
+	atom **cas;
+  center_of_mass *cm;
+} pdbf;
+
+
+
+/*************************************************************
+* Localization Structures for converting characters to integers
+**************************************************************/
+typedef struct gk_i2cc2i_t {
+    int n;
+    char *i2c;
+    int *c2i;
+} gk_i2cc2i_t;
+ 
+
+/*******************************************************************
+ *This structure implements storage of a protein sequence
+ * *****************************************************************/
+typedef struct gk_seq_t {
+    
+    int len; /*Number of Residues */
+    int *sequence; /* Stores the sequence*/
+    
+    
+    int **pssm; /* Stores the pssm matrix */
+    int **psfm; /* Stores the psfm matrix */
+    char *name; /* Stores the name of the sequence */
+
+    int nsymbols;
+
+    
+} gk_seq_t;
+
+
+
+
+/*************************************************************************/
+/*! The following data structure stores information about a memory 
+    allocation operation that can either be served from gk_mcore_t or by
+    a gk_malloc if not sufficient workspace memory is available. */
+/*************************************************************************/
+typedef struct gk_mop_t {
+  int type;
+  ssize_t nbytes;
+  void *ptr;
+} gk_mop_t;
+
+
+/*************************************************************************/
+/*! The following structure stores information used by Metis */
+/*************************************************************************/
+typedef struct gk_mcore_t {
+  /* Workspace information */
+  size_t coresize;     /*!< The amount of core memory that has been allocated */
+  size_t corecpos;     /*!< Index of the first free location in core */
+  void *core;	       /*!< Pointer to the core itself */
+
+  /* These are for implementing a stack-based allocation scheme using both
+     core and also dynamically allocated memory */
+  size_t nmops;         /*!< The number of maop_t entries that have been allocated */
+  size_t cmop;          /*!< Index of the first free location in maops */
+  gk_mop_t *mops;       /*!< The array recording the maop_t operations */
+
+  /* These are for keeping various statistics for wspacemalloc */
+  size_t num_callocs;   /*!< The number of core mallocs */
+  size_t num_hallocs;   /*!< The number of heap mallocs */
+  size_t size_callocs;  /*!< The total # of bytes in core mallocs */
+  size_t size_hallocs;  /*!< The total # of bytes in heap mallocs */
+  size_t cur_callocs;   /*!< The current # of bytes in core mallocs */
+  size_t cur_hallocs;   /*!< The current # of bytes in heap mallocs */
+  size_t max_callocs;   /*!< The maximum # of bytes in core mallocs at any given time */
+  size_t max_hallocs;   /*!< The maximum # of bytes in heap mallocs at any given time */
+
+} gk_mcore_t;
+
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gk_types.h b/3rdParty/metis/metis-5.1.0/GKlib/gk_types.h
new file mode 100644
index 000000000..57c119101
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gk_types.h
@@ -0,0 +1,38 @@
+/*!
+\file  gk_types.h
+\brief This file contains basic scalar datatype used in GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_TYPES_H_
+#define _GK_TYPES_H_
+
+/*************************************************************************
+* Basic data type definitions. These definitions allow GKlib to separate
+* the following elemental types:
+* - loop iterator variables, which are set to size_t
+* - signed and unsigned int variables that can be set to any # of bits
+* - signed and unsigned long variables that can be set to any # of bits
+* - real variables, which can be set to single or double precision.
+**************************************************************************/
+/*typedef ptrdiff_t       gk_idx_t;       */  /* index variable */
+typedef ssize_t         gk_idx_t;         /* index variable */
+typedef int32_t         gk_int_t;         /* integer values */
+typedef uint32_t        gk_uint_t;        /* unsigned integer values */
+typedef int64_t         gk_long_t;        /* long integer values */
+typedef uint64_t        gk_ulong_t;       /* unsigned long integer values */
+typedef float           gk_real_t;        /* real type */
+typedef double          gk_dreal_t;       /* double precission real type */
+typedef double          gk_wclock_t;	  /* wall-clock time */
+
+/*#define GK_IDX_MAX PTRDIFF_MAX*/
+#define GK_IDX_MAX ((SIZE_MAX>>1)-2)
+
+#define PRIGKIDX "zd"
+#define SCNGKIDX "zd"
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gkregex.c b/3rdParty/metis/metis-5.1.0/GKlib/gkregex.c
new file mode 100644
index 000000000..8a09caab7
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gkregex.c
@@ -0,0 +1,10704 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* this is for removing a compiler warning */
+void gkfooo() { return; }
+
+#ifdef USE_GKREGEX
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+#include "GKlib.h"
+
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.h" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__MINGW32_VERSION) || defined(_MSC_VER)
+#define strcasecmp stricmp
+#endif
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#else
+typedef enum { false, true } bool;
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank().  */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#   include <locale/localeinfo.h>
+#   include <locale/elem-hash.h>
+#   include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages.  */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+#  undef gettext
+#  define gettext(msgid) \
+  INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition.  */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of single byte character.  */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline.  */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc.  */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits.  It must be unsigned,
+   and must be at least as wide as unsigned int.  */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t.  */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t.  */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set.  */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+  (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+  (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+  (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+  WORD_DELIM = WORD_DELIM_CONSTRAINT,
+  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+  int alloc;
+  int nelem;
+  int *elems;
+} re_node_set;
+
+typedef enum
+{
+  NON_TYPE = 0,
+
+  /* Node type, These are used by token, node, tree.  */
+  CHARACTER = 1,
+  END_OF_RE = 2,
+  SIMPLE_BRACKET = 3,
+  OP_BACK_REF = 4,
+  OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET = 6,
+  OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+  /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+     when the debugger shows values of this enum type.  */
+#define EPSILON_BIT 8
+  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+  OP_ALT = EPSILON_BIT | 2,
+  OP_DUP_ASTERISK = EPSILON_BIT | 3,
+  ANCHOR = EPSILON_BIT | 4,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT = 16,
+  SUBEXP = 17,
+
+  /* Token type, these are used only by token.  */
+  OP_DUP_PLUS = 18,
+  OP_DUP_QUESTION,
+  OP_OPEN_BRACKET,
+  OP_CLOSE_BRACKET,
+  OP_CHARSET_RANGE,
+  OP_OPEN_DUP_NUM,
+  OP_CLOSE_DUP_NUM,
+  OP_NON_MATCH_LIST,
+  OP_OPEN_COLL_ELEM,
+  OP_CLOSE_COLL_ELEM,
+  OP_OPEN_EQUIV_CLASS,
+  OP_CLOSE_EQUIV_CLASS,
+  OP_OPEN_CHAR_CLASS,
+  OP_CLOSE_CHAR_CLASS,
+  OP_WORD,
+  OP_NOTWORD,
+  OP_SPACE,
+  OP_NOTSPACE,
+  BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+  /* Multibyte characters.  */
+  wchar_t *mbchars;
+
+  /* Collating symbols.  */
+# ifdef _LIBC
+  int32_t *coll_syms;
+# endif
+
+  /* Equivalence classes. */
+# ifdef _LIBC
+  int32_t *equiv_classes;
+# endif
+
+  /* Range expressions. */
+# ifdef _LIBC
+  uint32_t *range_starts;
+  uint32_t *range_ends;
+# else /* not _LIBC */
+  wchar_t *range_starts;
+  wchar_t *range_ends;
+# endif /* not _LIBC */
+
+  /* Character classes. */
+  wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
+  int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+  union
+  {
+    unsigned char c;		/* for CHARACTER */
+    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+    int idx;			/* for BACK_REF */
+    re_context_type ctx_type;	/* for ANCHOR */
+  } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
+  unsigned int constraint : 10;	/* context constraint */
+  unsigned int duplicated : 1;
+  unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+  unsigned int accept_mb : 1;
+  /* These 2 bits can be moved into the union if needed (e.g. if running out
+     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
+  unsigned int mb_partial : 1;
+#endif
+  unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Store the multibyte string.  In case of "case insensitive mode" like
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+  /* Store the wide character string which is corresponding to MBS.  */
+  wint_t *wcs;
+  int *offsets;
+  mbstate_t cur_state;
+#endif
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The corresponding number of bytes in raw_mbs array.  */
+  int valid_raw_len;
+  /* The length of the buffers MBS and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* length of RAW_MBS array.  */
+  int raw_len;
+  /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
+  int len;
+  /* End of the buffer may be shorter than its length in the cases such
+     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
+     instead of LEN.  */
+  int raw_stop;
+  /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
+  int stop;
+
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* Copy of re_dfa_t's word_char.  */
+  re_const_bitset_ptr_t word_char;
+  /* 1 if REG_ICASE.  */
+  unsigned char icase;
+  unsigned char is_utf8;
+  unsigned char map_notascii;
+  unsigned char mbs_allocated;
+  unsigned char offsets_needed;
+  unsigned char newline_anchor;
+  unsigned char word_ops_used;
+  int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+#  define internal_function   __attribute ((regparm (3), stdcall))
+# else
+#  define internal_function
+# endif
+#endif
+
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+						int new_buf_len)
+     internal_function;
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+					  int eflags)
+     internal_function __attribute ((pure));
+#define re_string_peek_byte(pstr, offset) \
+  ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+  ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+  ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+  ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+				|| (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#ifdef __GNUC__
+# define alloca(size)   __builtin_alloca (size)
+# define HAVE_ALLOCA 1
+#elif defined(_MSC_VER)
+# include <malloc.h>
+# define alloca _alloca
+# define HAVE_ALLOCA 1
+#else
+# error No alloca()
+#endif
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+   and a page size can be as small as 4096 bytes.  So we cannot safely
+   allocate anything larger than 4096 bytes.  Also care for the possibility
+   of a few compiler-allocated temporary stack slots.  */
+#  define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc.  */
+#  define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+  struct bin_tree_t *parent;
+  struct bin_tree_t *left;
+  struct bin_tree_t *right;
+  struct bin_tree_t *first;
+  struct bin_tree_t *next;
+
+  re_token_t token;
+
+  /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+     Otherwise `type' indicate the type of this node.  */
+  int node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+  ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+  struct bin_tree_storage_t *next;
+  bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+  || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+  || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+  unsigned int hash;
+  re_node_set nodes;
+  re_node_set non_eps_nodes;
+  re_node_set inveclosure;
+  re_node_set *entrance_nodes;
+  struct re_dfastate_t **trtable, **word_trtable;
+  unsigned int context : 4;
+  unsigned int halt : 1;
+  /* If this state can accept `multi byte'.
+     Note that we refer to multibyte characters, and multi character
+     collating elements as `multi byte'.  */
+  unsigned int accept_mb : 1;
+  /* If this state has backreference node(s).  */
+  unsigned int has_backref : 1;
+  unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+  int num;
+  int alloc;
+  re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
+
+typedef struct
+{
+  int next_idx;
+  int alloc;
+  re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
+
+typedef struct
+{
+  int node;
+  int str_idx; /* The position NODE match at.  */
+  state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+   And information about the node, whose type is OP_CLOSE_SUBEXP,
+   corresponding to NODE is stored in LASTS.  */
+
+typedef struct
+{
+  int str_idx;
+  int node;
+  state_array_t *path;
+  int alasts; /* Allocation size of LASTS.  */
+  int nlasts; /* The number of LASTS.  */
+  re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+  int node;
+  int str_idx;
+  int subexp_from;
+  int subexp_to;
+  char more;
+  char unused;
+  unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+  /* The string object corresponding to the input string.  */
+  re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  const re_dfa_t *const dfa;
+#else
+  const re_dfa_t *dfa;
+#endif
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  int last_node;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_mb_elem_len;
+  int nsub_tops;
+  int asub_tops;
+  re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **limited_states;
+  int last_node;
+  int last_str_idx;
+  re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+  int idx;
+  int node;
+  regmatch_t *regs;
+  re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+  int num;
+  int alloc;
+  struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+  re_token_t *nodes;
+  size_t nodes_alloc;
+  size_t nodes_len;
+  int *nexts;
+  int *org_indices;
+  re_node_set *edests;
+  re_node_set *eclosures;
+  re_node_set *inveclosures;
+  struct re_state_table_entry *state_table;
+  re_dfastate_t *init_state;
+  re_dfastate_t *init_state_word;
+  re_dfastate_t *init_state_nl;
+  re_dfastate_t *init_state_begbuf;
+  bin_tree_t *str_tree;
+  bin_tree_storage_t *str_tree_storage;
+  re_bitset_ptr_t sb_char;
+  int str_tree_storage_idx;
+
+  /* number of subexpressions `re_nsub' is in regex_t.  */
+  unsigned int state_hash_mask;
+  int init_node;
+  int nbackref; /* The number of backreference in this dfa.  */
+
+  /* Bitmap expressing which backreference is used.  */
+  bitset_word_t used_bkref_map;
+  bitset_word_t completed_bkref_map;
+
+  unsigned int has_plural_match : 1;
+  /* If this dfa has "multibyte node", which is a backreference or
+     a node which can accept multibyte character or multi character
+     collating element.  */
+  unsigned int has_mb_node : 1;
+  unsigned int is_utf8 : 1;
+  unsigned int map_notascii : 1;
+  unsigned int word_ops_used : 1;
+  int mb_cur_max;
+  bitset_t word_char;
+  reg_syntax_t syntax;
+  int *subexp_map;
+#ifdef DEBUG
+  char* re_str;
+#endif
+  __libc_lock_define (, lock)
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+  (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+  SB_CHAR,
+  MB_CHAR,
+  EQUIV_CLASS,
+  COLL_SYM,
+  CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+  bracket_elem_type type;
+  union
+  {
+    unsigned char ch;
+    unsigned char *name;
+    wchar_t wch;
+  } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation.  */
+static inline void
+bitset_not (bitset_t set)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string.  */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, int idx)
+{
+  int byte_idx;
+  if (pstr->mb_cur_max == 1)
+    return 1;
+  for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+    if (pstr->wcs[idx + byte_idx] != WEOF)
+      break;
+  return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, int idx)
+{
+  if (pstr->mb_cur_max == 1)
+    return (wint_t) pstr->mbs[idx];
+  return (wint_t) pstr->wcs[idx];
+}
+
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, int idx)
+{
+# ifdef _LIBC
+  const unsigned char *p, *extra;
+  const int32_t *table, *indirect;
+  int32_t tmp;
+#  include <locale/weight.h>
+  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+  if (nrules != 0)
+    {
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      p = pstr->mbs + idx;
+      tmp = findidx (&p);
+      return p - pstr->mbs - idx;
+    }
+  else
+# endif /* _LIBC */
+    return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /*  _REGEX_INTERNAL_H */
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static void re_string_construct_common (const char *str, int len,
+					re_string_t *pstr,
+					RE_TRANSLATE_TYPE trans, int icase,
+					const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int context,
+					  unsigned int hash) internal_function;
+
+/* Functions for string operation.  */
+
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  int init_buf_len;
+
+  /* Ensure at least one character fits into the buffers.  */
+  if (init_len < dfa->mb_cur_max)
+    init_len = dfa->mb_cur_max;
+  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->word_char = dfa->word_char;
+  pstr->word_ops_used = dfa->word_ops_used;
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+  pstr->valid_raw_len = pstr->valid_len;
+  return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them.  */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  memset (pstr, '\0', sizeof (re_string_t));
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  if (len > 0)
+    {
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
+    }
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (1)
+	    {
+	      ret = build_wcs_upper_buffer (pstr);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	      if (pstr->valid_raw_len >= len)
+		break;
+	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+		break;
+	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	    }
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (trans != NULL)
+	    re_string_translate_buffer (pstr);
+	  else
+	    {
+	      pstr->valid_len = pstr->bufs_len;
+	      pstr->valid_raw_len = pstr->bufs_len;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+      if (BE (new_wcs == NULL, 0))
+	return REG_ESPACE;
+      pstr->wcs = new_wcs;
+      if (pstr->offsets != NULL)
+	{
+	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+	  if (BE (new_offsets == NULL, 0))
+	    return REG_ESPACE;
+	  pstr->offsets = new_offsets;
+	}
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    {
+      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+					   new_buf_len);
+      if (BE (new_mbs == NULL, 0))
+	return REG_ESPACE;
+      pstr->mbs = new_mbs;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+			    RE_TRANSLATE_TYPE trans, int icase,
+			    const re_dfa_t *dfa)
+{
+  pstr->raw_mbs = (const unsigned char *) str;
+  pstr->len = len;
+  pstr->raw_len = len;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
+  pstr->mbs_allocated = (trans != NULL || icase);
+  pstr->mb_cur_max = dfa->mb_cur_max;
+  pstr->is_utf8 = dfa->is_utf8;
+  pstr->map_notascii = dfa->map_notascii;
+  pstr->stop = pstr->len;
+  pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+   If the byte sequence of the string are:
+     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+   Then wide character buffer will be:
+     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
+   We use WEOF for padding, they indicate that the position isn't
+   a first byte of a multibyte character.
+
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+  unsigned char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  unsigned char buf[64];
+#endif
+  mbstate_t prev_st;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
+
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      const char *p;
+
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      /* Apply the translation if we need.  */
+      if (BE (pstr->trans != NULL, 0))
+	{
+	  int i, ch;
+
+	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	    {
+	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+	    }
+	  p = (const char *) buf;
+	}
+      else
+	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+      mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+	{
+	  /* The buffer doesn't have enough space, finish to build.  */
+	  pstr->cur_state = prev_st;
+	  break;
+	}
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a singlebyte character.  */
+	  mbclen = 1;
+	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	  if (BE (pstr->trans != NULL, 0))
+	    wc = pstr->trans[wc];
+	  pstr->cur_state = prev_st;
+	}
+
+      /* Write wide character and padding.  */
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	pstr->wcs[byte_idx++] = WEOF;
+    }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+  mbstate_t prev_st;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
+#ifdef _LIBC
+  char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  char buf[64];
+#endif
+
+  byte_idx = pstr->valid_len;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  /* The following optimization assumes that ASCII characters can be
+     mapped to wide characters with a simple cast.  */
+  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+    {
+      while (byte_idx < end_idx)
+	{
+	  wchar_t wc;
+
+	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+	      && mbsinit (&pstr->cur_state))
+	    {
+	      /* In case of a singlebyte character.  */
+	      pstr->mbs[byte_idx]
+		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+	      /* The next step uses the assumption that wchar_t is encoded
+		 ASCII-safe: all ASCII values can be converted like this.  */
+	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+	      ++byte_idx;
+	      continue;
+	    }
+
+	  remain_len = end_idx - byte_idx;
+	  prev_st = pstr->cur_state;
+	  mbclen = mbrtowc (&wc,
+			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+			     + byte_idx), remain_len, &pstr->cur_state);
+	  if (BE (mbclen + 2 > 2, 1))
+	    {
+	      wchar_t wcu = wc;
+	      if (iswlower (wc))
+		{
+		  size_t mbcdlen;
+
+		  wcu = towupper (wc);
+		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
+		  if (BE (mbclen == mbcdlen, 1))
+		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		  else
+		    {
+		      src_idx = byte_idx;
+		      goto offsets_needed;
+		    }
+		}
+	      else
+		memcpy (pstr->mbs + byte_idx,
+			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+	      pstr->wcs[byte_idx++] = wcu;
+	      /* Write paddings.  */
+	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+		pstr->wcs[byte_idx++] = WEOF;
+	    }
+	  else if (mbclen == (size_t) -1 || mbclen == 0)
+	    {
+	      /* It is an invalid character or '\0'.  Just use the byte.  */
+	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	      pstr->mbs[byte_idx] = ch;
+	      /* And also cast it to wide char.  */
+	      pstr->wcs[byte_idx++] = (wchar_t) ch;
+	      if (BE (mbclen == (size_t) -1, 0))
+		pstr->cur_state = prev_st;
+	    }
+	  else
+	    {
+	      /* The buffer doesn't have enough space, finish to build.  */
+	      pstr->cur_state = prev_st;
+	      break;
+	    }
+	}
+      pstr->valid_len = byte_idx;
+      pstr->valid_raw_len = byte_idx;
+      return REG_NOERROR;
+    }
+  else
+    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+      {
+	wchar_t wc;
+	const char *p;
+      offsets_needed:
+	remain_len = end_idx - byte_idx;
+	prev_st = pstr->cur_state;
+	if (BE (pstr->trans != NULL, 0))
+	  {
+	    int i, ch;
+
+	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	      {
+		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+		buf[i] = pstr->trans[ch];
+	      }
+	    p = (const char *) buf;
+	  }
+	else
+	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+	if (BE (mbclen + 2 > 2, 1))
+	  {
+	    wchar_t wcu = wc;
+	    if (iswlower (wc))
+	      {
+		size_t mbcdlen;
+
+		wcu = towupper (wc);
+		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+		if (BE (mbclen == mbcdlen, 1))
+		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		else if (mbcdlen != (size_t) -1)
+		  {
+		    size_t i;
+
+		    if (byte_idx + mbcdlen > pstr->bufs_len)
+		      {
+			pstr->cur_state = prev_st;
+			break;
+		      }
+
+		    if (pstr->offsets == NULL)
+		      {
+			pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+			if (pstr->offsets == NULL)
+			  return REG_ESPACE;
+		      }
+		    if (!pstr->offsets_needed)
+		      {
+			for (i = 0; i < (size_t) byte_idx; ++i)
+			  pstr->offsets[i] = i;
+			pstr->offsets_needed = 1;
+		      }
+
+		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+		    pstr->wcs[byte_idx] = wcu;
+		    pstr->offsets[byte_idx] = src_idx;
+		    for (i = 1; i < mbcdlen; ++i)
+		      {
+			pstr->offsets[byte_idx + i]
+			  = src_idx + (i < mbclen ? i : mbclen - 1);
+			pstr->wcs[byte_idx + i] = WEOF;
+		      }
+		    pstr->len += mbcdlen - mbclen;
+		    if (pstr->raw_stop > src_idx)
+		      pstr->stop += mbcdlen - mbclen;
+		    end_idx = (pstr->bufs_len > pstr->len)
+			      ? pstr->len : pstr->bufs_len;
+		    byte_idx += mbcdlen;
+		    src_idx += mbclen;
+		    continue;
+		  }
+                else
+                  memcpy (pstr->mbs + byte_idx, p, mbclen);
+	      }
+	    else
+	      memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      {
+		size_t i;
+		for (i = 0; i < mbclen; ++i)
+		  pstr->offsets[byte_idx + i] = src_idx + i;
+	      }
+	    src_idx += mbclen;
+
+	    pstr->wcs[byte_idx++] = wcu;
+	    /* Write paddings.  */
+	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	      pstr->wcs[byte_idx++] = WEOF;
+	  }
+	else if (mbclen == (size_t) -1 || mbclen == 0)
+	  {
+	    /* It is an invalid character or '\0'.  Just use the byte.  */
+	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+	    if (BE (pstr->trans != NULL, 0))
+	      ch = pstr->trans [ch];
+	    pstr->mbs[byte_idx] = ch;
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      pstr->offsets[byte_idx] = src_idx;
+	    ++src_idx;
+
+	    /* And also cast it to wide char.  */
+	    pstr->wcs[byte_idx++] = (wchar_t) ch;
+	    if (BE (mbclen == (size_t) -1, 0))
+	      pstr->cur_state = prev_st;
+	  }
+	else
+	  {
+	    /* The buffer doesn't have enough space, finish to build.  */
+	    pstr->cur_state = prev_st;
+	    break;
+	  }
+      }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = src_idx;
+  return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+  mbstate_t prev_st;
+  int rawbuf_idx;
+  size_t mbclen;
+  wchar_t wc = WEOF;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      int remain_len;
+      remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+			remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a single byte character.  */
+	  if (mbclen == 0 || remain_len == 0)
+	    wc = L'\0';
+	  else
+	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+	  mbclen = 1;
+	  pstr->cur_state = prev_st;
+	}
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  *last_wc = (wint_t) wc;
+  return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N  */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+   This function is used in case of REG_ICASE.  */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (BE (pstr->trans != NULL, 0))
+	ch = pstr->trans[ch];
+      if (islower (ch))
+	pstr->mbs[char_idx] = toupper (ch);
+      else
+	pstr->mbs[char_idx] = ch;
+    }
+  pstr->valid_len = char_idx;
+  pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+  pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+  int offset = idx - pstr->raw_mbs_idx;
+  if (BE (offset < 0, 0))
+    {
+      /* Reset buffer.  */
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+      pstr->len = pstr->raw_len;
+      pstr->stop = pstr->raw_stop;
+      pstr->valid_len = 0;
+      pstr->raw_mbs_idx = 0;
+      pstr->valid_raw_len = 0;
+      pstr->offsets_needed = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!pstr->mbs_allocated)
+	pstr->mbs = (unsigned char *) pstr->raw_mbs;
+      offset = idx;
+    }
+
+  if (BE (offset != 0, 1))
+    {
+      /* Should the already checked characters be kept?  */
+      if (BE (offset < pstr->valid_raw_len, 1))
+	{
+	  /* Yes, move them to the front of the buffer.  */
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      int low = 0, high = pstr->valid_len, mid;
+	      do
+		{
+		  mid = (high + low) / 2;
+		  if (pstr->offsets[mid] > offset)
+		    high = mid;
+		  else if (pstr->offsets[mid] < offset)
+		    low = mid + 1;
+		  else
+		    break;
+		}
+	      while (low < high);
+	      if (pstr->offsets[mid] < offset)
+		++mid;
+	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
+							eflags);
+	      /* This can be quite complicated, so handle specially
+		 only the common and easy case where the character with
+		 different length representation of lower and upper
+		 case is present at or after offset.  */
+	      if (pstr->valid_len > offset
+		  && mid == offset && pstr->offsets[mid] == offset)
+		{
+		  memmove (pstr->wcs, pstr->wcs + offset,
+			   (pstr->valid_len - offset) * sizeof (wint_t));
+		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+		  pstr->valid_len -= offset;
+		  pstr->valid_raw_len -= offset;
+		  for (low = 0; low < pstr->valid_len; low++)
+		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+		}
+	      else
+		{
+		  /* Otherwise, just find out how long the partial multibyte
+		     character at offset is and fill it with WEOF/255.  */
+		  pstr->len = pstr->raw_len - idx + offset;
+		  pstr->stop = pstr->raw_stop - idx + offset;
+		  pstr->offsets_needed = 0;
+		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
+		    --mid;
+		  while (mid < pstr->valid_len)
+		    if (pstr->wcs[mid] != WEOF)
+		      break;
+		    else
+		      ++mid;
+		  if (mid == pstr->valid_len)
+		    pstr->valid_len = 0;
+		  else
+		    {
+		      pstr->valid_len = pstr->offsets[mid] - offset;
+		      if (pstr->valid_len)
+			{
+			  for (low = 0; low < pstr->valid_len; ++low)
+			    pstr->wcs[low] = WEOF;
+			  memset (pstr->mbs, 255, pstr->valid_len);
+			}
+		    }
+		  pstr->valid_raw_len = pstr->valid_len;
+		}
+	    }
+	  else
+#endif
+	    {
+	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
+							eflags);
+#ifdef RE_ENABLE_I18N
+	      if (pstr->mb_cur_max > 1)
+		memmove (pstr->wcs, pstr->wcs + offset,
+			 (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+	      if (BE (pstr->mbs_allocated, 0))
+		memmove (pstr->mbs, pstr->mbs + offset,
+			 pstr->valid_len - offset);
+	      pstr->valid_len -= offset;
+	      pstr->valid_raw_len -= offset;
+#if DEBUG
+	      assert (pstr->valid_len > 0);
+#endif
+	    }
+	}
+      else
+	{
+	  /* No, skip all characters until IDX.  */
+	  int prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      pstr->len = pstr->raw_len - idx + offset;
+	      pstr->stop = pstr->raw_stop - idx + offset;
+	      pstr->offsets_needed = 0;
+	    }
+#endif
+	  pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+	  if (pstr->mb_cur_max > 1)
+	    {
+	      int wcs_idx;
+	      wint_t wc = WEOF;
+
+	      if (pstr->is_utf8)
+		{
+		  const unsigned char *raw, *p, *q, *end;
+
+		  /* Special case UTF-8.  Multi-byte chars start with any
+		     byte other than 0x80 - 0xbf.  */
+		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+		  end = raw + (offset - pstr->mb_cur_max);
+		  if (end < pstr->raw_mbs)
+		    end = pstr->raw_mbs;
+		  p = raw + offset - 1;
+#ifdef _LIBC
+		  /* We know the wchar_t encoding is UCS4, so for the simple
+		     case, ASCII characters, skip the conversion step.  */
+		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
+		    {
+		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+		      /* pstr->valid_len = 0; */
+		      wc = (wchar_t) *p;
+		    }
+		  else
+#endif
+		    for (; p >= end; --p)
+		      if ((*p & 0xc0) != 0x80)
+			{
+			  mbstate_t cur_state;
+			  wchar_t wc2;
+			  int mlen = raw + pstr->len - p;
+			  unsigned char buf[6];
+			  size_t mbclen;
+
+			  q = p;
+			  if (BE (pstr->trans != NULL, 0))
+			    {
+			      int i = mlen < 6 ? mlen : 6;
+			      while (--i >= 0)
+				buf[i] = pstr->trans[p[i]];
+			      q = buf;
+			    }
+			  /* XXX Don't use mbrtowc, we know which conversion
+			     to use (UTF-8 -> UCS4).  */
+			  memset (&cur_state, 0, sizeof (cur_state));
+			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+					    &cur_state);
+			  if (raw + offset - p <= mbclen
+			      && mbclen < (size_t) -2)
+			    {
+			      memset (&pstr->cur_state, '\0',
+				      sizeof (mbstate_t));
+			      pstr->valid_len = mbclen - (raw + offset - p);
+			      wc = wc2;
+			    }
+			  break;
+			}
+		}
+
+	      if (wc == WEOF)
+		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+	      if (wc == WEOF)
+		pstr->tip_context
+		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+	      else
+		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+				      && IS_WIDE_WORD_CHAR (wc))
+				     ? CONTEXT_WORD
+				     : ((IS_WIDE_NEWLINE (wc)
+					 && pstr->newline_anchor)
+					? CONTEXT_NEWLINE : 0));
+	      if (BE (pstr->valid_len, 0))
+		{
+		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+		    pstr->wcs[wcs_idx] = WEOF;
+		  if (pstr->mbs_allocated)
+		    memset (pstr->mbs, 255, pstr->valid_len);
+		}
+	      pstr->valid_raw_len = pstr->valid_len;
+	    }
+	  else
+#endif /* RE_ENABLE_I18N */
+	    {
+	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+	      pstr->valid_raw_len = 0;
+	      if (pstr->trans)
+		c = pstr->trans[c];
+	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
+				   ? CONTEXT_WORD
+				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
+				      ? CONTEXT_NEWLINE : 0));
+	    }
+	}
+      if (!BE (pstr->mbs_allocated, 0))
+	pstr->mbs += offset;
+    }
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+  pstr->stop -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      if (pstr->icase)
+	{
+	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+	build_wcs_buffer (pstr);
+    }
+  else
+#endif /* RE_ENABLE_I18N */
+    if (BE (pstr->mbs_allocated, 0))
+      {
+	if (pstr->icase)
+	  build_upper_buffer (pstr);
+	else if (pstr->trans != NULL)
+	  re_string_translate_buffer (pstr);
+      }
+    else
+      pstr->valid_len = pstr->len;
+
+  pstr->cur_idx = 0;
+  return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+  int ch, off;
+
+  /* Handle the common (easiest) cases first.  */
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1
+      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    off = pstr->offsets[off];
+#endif
+
+  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+     this function returns CAPITAL LETTER I instead of first byte of
+     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
+     since peek_byte_case doesn't advance cur_idx in any way.  */
+  if (pstr->offsets_needed && !isascii (ch))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    {
+      int off, ch;
+
+      /* For tr_TR.UTF-8 [[:islower:]] there is
+	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
+	 in that case the whole multi-byte character and return
+	 the original letter.  On the other side, with
+	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
+	 anything else would complicate things too much.  */
+
+      if (!re_string_first_byte (pstr, pstr->cur_idx))
+	return re_string_fetch_byte (pstr);
+
+      off = pstr->offsets[pstr->cur_idx];
+      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+      if (! isascii (ch))
+	return re_string_fetch_byte (pstr);
+
+      re_string_skip_bytes (pstr,
+			    re_string_char_size_at (pstr, pstr->cur_idx));
+      return ch;
+    }
+#endif
+
+  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+  re_free (pstr->wcs);
+  re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT.  */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+  int c;
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+  if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc;
+      int wc_idx = idx;
+      while(input->wcs[wc_idx] == WEOF)
+	{
+#ifdef DEBUG
+	  /* It must not happen.  */
+	  assert (wc_idx >= 0);
+#endif
+	  --wc_idx;
+	  if (wc_idx < 0)
+	    return input->tip_context;
+	}
+      wc = input->wcs[wc_idx];
+      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+	return CONTEXT_WORD;
+      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+	      ? CONTEXT_NEWLINE : 0);
+    }
+  else
+#endif
+    {
+      c = re_string_byte_at (input, idx);
+      if (bitset_contain (input->word_char, c))
+	return CONTEXT_WORD;
+      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+    }
+}
+
+/* Functions for set operation.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+  set->alloc = size;
+  set->nelem = 0;
+  set->elems = re_malloc (int, size);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+  set->alloc = 1;
+  set->nelem = 1;
+  set->elems = re_malloc (int, 1);
+  if (BE (set->elems == NULL, 0))
+    {
+      set->alloc = set->nelem = 0;
+      return REG_ESPACE;
+    }
+  set->elems[0] = elem;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+  set->alloc = 2;
+  set->elems = re_malloc (int, 2);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  if (elem1 == elem2)
+    {
+      set->nelem = 1;
+      set->elems[0] = elem1;
+    }
+  else
+    {
+      set->nelem = 2;
+      if (elem1 < elem2)
+	{
+	  set->elems[0] = elem1;
+	  set->elems[1] = elem2;
+	}
+      else
+	{
+	  set->elems[0] = elem2;
+	  set->elems[1] = elem1;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+  dest->nelem = src->nelem;
+  if (src->nelem > 0)
+    {
+      dest->alloc = dest->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	{
+	  dest->alloc = dest->nelem = 0;
+	  return REG_ESPACE;
+	}
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+    }
+  else
+    re_node_set_init_empty (dest);
+  return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+			   const re_node_set *src2)
+{
+  int i1, i2, is, id, delta, sbase;
+  if (src1->nelem == 0 || src2->nelem == 0)
+    return REG_NOERROR;
+
+  /* We need dest->nelem + 2 * elems_in_intersection; this is a
+     conservative estimate.  */
+  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+    {
+      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_elems == NULL, 0))
+        return REG_ESPACE;
+      dest->elems = new_elems;
+      dest->alloc = new_alloc;
+    }
+
+  /* Find the items in the intersection of SRC1 and SRC2, and copy
+     into the top of DEST those that are not already in DEST itself.  */
+  sbase = dest->nelem + src1->nelem + src2->nelem;
+  i1 = src1->nelem - 1;
+  i2 = src2->nelem - 1;
+  id = dest->nelem - 1;
+  for (;;)
+    {
+      if (src1->elems[i1] == src2->elems[i2])
+	{
+	  /* Try to find the item in DEST.  Maybe we could binary search?  */
+	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	    --id;
+
+          if (id < 0 || dest->elems[id] != src1->elems[i1])
+            dest->elems[--sbase] = src1->elems[i1];
+
+	  if (--i1 < 0 || --i2 < 0)
+	    break;
+	}
+
+      /* Lower the highest of the two items.  */
+      else if (src1->elems[i1] < src2->elems[i2])
+	{
+	  if (--i2 < 0)
+	    break;
+	}
+      else
+	{
+	  if (--i1 < 0)
+	    break;
+	}
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + src1->nelem + src2->nelem - 1;
+  delta = is - sbase + 1;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place; this is more or
+     less the same loop that is in re_node_set_merge.  */
+  dest->nelem += delta;
+  if (delta > 0 && id >= 0)
+    for (;;)
+      {
+        if (dest->elems[is] > dest->elems[id])
+          {
+            /* Copy from the top.  */
+            dest->elems[id + delta--] = dest->elems[is--];
+            if (delta == 0)
+              break;
+          }
+        else
+          {
+            /* Slide from the bottom.  */
+            dest->elems[id + delta] = dest->elems[id];
+            if (--id < 0)
+              break;
+          }
+      }
+
+  /* Copy remaining SRC elements.  */
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+			const re_node_set *src2)
+{
+  int i1, i2, id;
+  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+    {
+      dest->alloc = src1->nelem + src2->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	return REG_ESPACE;
+    }
+  else
+    {
+      if (src1 != NULL && src1->nelem > 0)
+	return re_node_set_init_copy (dest, src1);
+      else if (src2 != NULL && src2->nelem > 0)
+	return re_node_set_init_copy (dest, src2);
+      else
+	re_node_set_init_empty (dest);
+      return REG_NOERROR;
+    }
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+	{
+	  dest->elems[id++] = src2->elems[i2++];
+	  continue;
+	}
+      if (src1->elems[i1] == src2->elems[i2])
+	++i2;
+      dest->elems[id++] = src1->elems[i1++];
+    }
+  if (i1 < src1->nelem)
+    {
+      memcpy (dest->elems + id, src1->elems + i1,
+	     (src1->nelem - i1) * sizeof (int));
+      id += src1->nelem - i1;
+    }
+  else if (i2 < src2->nelem)
+    {
+      memcpy (dest->elems + id, src2->elems + i2,
+	     (src2->nelem - i2) * sizeof (int));
+      id += src2->nelem - i2;
+    }
+  dest->nelem = id;
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+  int is, id, sbase, delta;
+  if (src == NULL || src->nelem == 0)
+    return REG_NOERROR;
+  if (dest->alloc < 2 * src->nelem + dest->nelem)
+    {
+      int new_alloc = 2 * (src->nelem + dest->alloc);
+      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_buffer == NULL, 0))
+	return REG_ESPACE;
+      dest->elems = new_buffer;
+      dest->alloc = new_alloc;
+    }
+
+  if (BE (dest->nelem == 0, 0))
+    {
+      dest->nelem = src->nelem;
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      return REG_NOERROR;
+    }
+
+  /* Copy into the top of DEST the items of SRC that are not
+     found in DEST.  Maybe we could binary search in DEST?  */
+  for (sbase = dest->nelem + 2 * src->nelem,
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+    {
+      if (dest->elems[id] == src->elems[is])
+        is--, id--;
+      else if (dest->elems[id] < src->elems[is])
+        dest->elems[--sbase] = src->elems[is--];
+      else /* if (dest->elems[id] > src->elems[is]) */
+        --id;
+    }
+
+  if (is >= 0)
+    {
+      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
+      sbase -= is + 1;
+      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + 2 * src->nelem - 1;
+  delta = is - sbase + 1;
+  if (delta == 0)
+    return REG_NOERROR;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place.  */
+  dest->nelem += delta;
+  for (;;)
+    {
+      if (dest->elems[is] > dest->elems[id])
+        {
+	  /* Copy from the top.  */
+          dest->elems[id + delta--] = dest->elems[is--];
+	  if (delta == 0)
+	    break;
+	}
+      else
+        {
+          /* Slide from the bottom.  */
+          dest->elems[id + delta] = dest->elems[id];
+	  if (--id < 0)
+	    {
+	      /* Copy remaining SRC elements.  */
+	      memcpy (dest->elems, dest->elems + sbase,
+	              delta * sizeof (int));
+	      break;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have ELEM.
+   return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+  int idx;
+  /* In case the set is empty.  */
+  if (set->alloc == 0)
+    {
+      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+	return 1;
+      else
+	return -1;
+    }
+
+  if (BE (set->nelem, 0) == 0)
+    {
+      /* We already guaranteed above that set->alloc != 0.  */
+      set->elems[0] = elem;
+      ++set->nelem;
+      return 1;
+    }
+
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = set->alloc * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Move the elements which follows the new element.  Test the
+     first element separately to skip a check in the inner loop.  */
+  if (elem < set->elems[0])
+    {
+      idx = 0;
+      for (idx = set->nelem; idx > 0; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+  else
+    {
+      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+
+  /* Insert the new element.  */
+  set->elems[idx] = elem;
+  ++set->nelem;
+  return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have any element greater than or equal to ELEM.
+   Return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = (set->alloc + 1) * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Insert the new element.  */
+  set->elems[set->nelem++] = elem;
+  return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+   return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+  int i;
+  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+    return 0;
+  for (i = set1->nelem ; --i >= 0 ; )
+    if (set1->elems[i] != set2->elems[i])
+      return 0;
+  return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+  unsigned int idx, right, mid;
+  if (set->nelem <= 0)
+    return 0;
+
+  /* Binary search the element.  */
+  idx = 0;
+  right = set->nelem - 1;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+	idx = mid + 1;
+      else
+	right = mid;
+    }
+  return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+  if (idx < 0 || idx >= set->nelem)
+    return;
+  --set->nelem;
+  for (; idx < set->nelem; idx++)
+    set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+   Or return -1, if an error will be occured.  */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+  int type = token.type;
+  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+    {
+      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+      int *new_nexts, *new_indices;
+      re_node_set *new_edests, *new_eclosures;
+      re_token_t *new_nodes;
+
+      /* Avoid overflows.  */
+      if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+	return -1;
+
+      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+      if (BE (new_nodes == NULL, 0))
+	return -1;
+      dfa->nodes = new_nodes;
+      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+      if (BE (new_nexts == NULL || new_indices == NULL
+	      || new_edests == NULL || new_eclosures == NULL, 0))
+	return -1;
+      dfa->nexts = new_nexts;
+      dfa->org_indices = new_indices;
+      dfa->edests = new_edests;
+      dfa->eclosures = new_eclosures;
+      dfa->nodes_alloc = new_nodes_alloc;
+    }
+  dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+  dfa->nodes[dfa->nodes_len].accept_mb =
+    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+  dfa->nexts[dfa->nodes_len] = -1;
+  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+  return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash = nodes->nelem + context;
+  int i;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    hash += nodes->elems[i];
+  return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+		  const re_node_set *nodes)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (BE (nodes->nelem == 0, 0))
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, 0);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+	continue;
+      if (re_node_set_compare (&state->nodes, nodes))
+	return state;
+    }
+
+  /* There are no appropriate state in the dfa, create the new one.  */
+  new_state = create_ci_newstate (dfa, nodes, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+   whose context is equivalent to CONTEXT.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+			  const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (nodes->nelem == 0)
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, context);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (state->hash == hash
+	  && state->context == context
+	  && re_node_set_compare (state->entrance_nodes, nodes))
+	return state;
+    }
+  /* There are no appropriate state in `dfa', create the new one.  */
+  new_state = create_cd_newstate (dfa, nodes, context, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+   HASH put in the appropriate bucket of DFA's state table.  Return value
+   indicates the error code if failed.  */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+		unsigned int hash)
+{
+  struct re_state_table_entry *spot;
+  reg_errcode_t err;
+  int i;
+
+  newstate->hash = hash;
+  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return REG_ESPACE;
+  for (i = 0; i < newstate->nodes.nelem; i++)
+    {
+      int elem = newstate->nodes.elems[i];
+      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+        re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+    }
+
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+  if (BE (spot->alloc <= spot->num, 0))
+    {
+      int new_alloc = 2 * spot->num + 2;
+      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+					      new_alloc);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      spot->array = new_array;
+      spot->alloc = new_alloc;
+    }
+  spot->array[spot->num++] = newstate;
+  return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+  re_node_set_free (&state->non_eps_nodes);
+  re_node_set_free (&state->inveclosure);
+  if (state->entrance_nodes != &state->nodes)
+    {
+      re_node_set_free (state->entrance_nodes);
+      re_free (state->entrance_nodes);
+    }
+  re_node_set_free (&state->nodes);
+  re_free (state->word_trtable);
+  re_free (state->trtable);
+  re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int hash)
+{
+  int i;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->entrance_nodes = &newstate->nodes;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (type == CHARACTER && !node->constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR || node->constraint)
+	newstate->has_constraint = 1;
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int context, unsigned int hash)
+{
+  int i, nctx_nodes = 0;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->context = context;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      unsigned int constraint = 0;
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (node->constraint)
+	constraint = node->constraint;
+
+      if (type == CHARACTER && !constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR)
+	constraint = node->opr.ctx_type;
+
+      if (constraint)
+	{
+	  if (newstate->entrance_nodes == &newstate->nodes)
+	    {
+	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
+	      if (BE (newstate->entrance_nodes == NULL, 0))
+		{
+		  free_state (newstate);
+		  return NULL;
+		}
+	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
+	      nctx_nodes = 0;
+	      newstate->has_constraint = 1;
+	    }
+
+	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+	    {
+	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+	      ++nctx_nodes;
+	    }
+	}
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return  newstate;
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regcomp.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+					  size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+				     const re_dfastate_t *init_state,
+				     char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+			       reg_errcode_t (fn (void *, bin_tree_t *)),
+			       void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+				reg_errcode_t (fn (void *, bin_tree_t *)),
+				void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+				 bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+				   unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+					 int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+			 reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+			reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+			  reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+				 re_token_t *token, reg_syntax_t syntax,
+				 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+				     re_token_t *token, reg_syntax_t syntax,
+				     int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+				 re_dfa_t *dfa, re_token_t *token,
+				 reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+				      re_token_t *token, reg_syntax_t syntax,
+				      reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+					    re_string_t *regexp,
+					    re_token_t *token, int token_len,
+					    re_dfa_t *dfa,
+					    reg_syntax_t syntax,
+					    int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+					  re_string_t *regexp,
+					  re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					re_charset_t *mbcset,
+					int *equiv_class_alloc,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      re_charset_t *mbcset,
+				      int *char_class_alloc,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#else  /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+				       RE_TRANSLATE_TYPE trans,
+				       const unsigned char *class_name,
+				       const unsigned char *extra,
+				       int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+				bin_tree_t *left, bin_tree_t *right,
+				re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+				      bin_tree_t *left, bin_tree_t *right,
+				      const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?  */
+
+const char __re_error_msgid[] attribute_hidden =
+  {
+#define REG_NOERROR_IDX	0
+    gettext_noop ("Success")	/* REG_NOERROR */
+    "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+    gettext_noop ("No match")	/* REG_NOMATCH */
+    "\0"
+#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
+    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+    "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+    "\0"
+#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+    "\0"
+#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
+    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+    "\0"
+#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
+    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+    "\0"
+#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
+    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
+    "\0"
+#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+    "\0"
+#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+    "\0"
+#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
+    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+    "\0"
+#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+    gettext_noop ("Invalid range end")	/* REG_ERANGE */
+    "\0"
+#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
+    gettext_noop ("Memory exhausted") /* REG_ESPACE */
+    "\0"
+#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
+    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+    "\0"
+#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+    gettext_noop ("Premature end of regular expression") /* REG_EEND */
+    "\0"
+#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
+    gettext_noop ("Regular expression too big") /* REG_ESIZE */
+    "\0"
+#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
+    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+  };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+  {
+    REG_NOERROR_IDX,
+    REG_NOMATCH_IDX,
+    REG_BADPAT_IDX,
+    REG_ECOLLATE_IDX,
+    REG_ECTYPE_IDX,
+    REG_EESCAPE_IDX,
+    REG_ESUBREG_IDX,
+    REG_EBRACK_IDX,
+    REG_EPAREN_IDX,
+    REG_EBRACE_IDX,
+    REG_BADBR_IDX,
+    REG_ERANGE_IDX,
+    REG_ESPACE_IDX,
+    REG_BADRPT_IDX,
+    REG_EEND_IDX,
+    REG_ESIZE_IDX,
+    REG_ERPAREN_IDX
+  };
+
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+    const char *pattern;
+    size_t length;
+    struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub, unless RE_NO_SUB is set.  */
+  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+  if (!ret)
+    return NULL;
+  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  char *fastmap = bufp->fastmap;
+
+  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+  if (dfa->init_state != dfa->init_state_word)
+    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+  if (dfa->init_state != dfa->init_state_nl)
+    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+  if (dfa->init_state != dfa->init_state_begbuf)
+    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+  bufp->fastmap_accurate = 1;
+  return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+  fastmap[ch] = 1;
+  if (icase)
+    fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+   Compile fastmap for the initial_state INIT_STATE.  */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+			 char *fastmap)
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  int node_cnt;
+  int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+    {
+      int node = init_state->nodes.elems[node_cnt];
+      re_token_type_t type = dfa->nodes[node].type;
+
+      if (type == CHARACTER)
+	{
+	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+	    {
+	      unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+	      wchar_t wc;
+	      mbstate_t state;
+
+	      p = buf;
+	      *p++ = dfa->nodes[node].opr.c;
+	      while (++node < dfa->nodes_len
+		     &&	dfa->nodes[node].type == CHARACTER
+		     && dfa->nodes[node].mb_partial)
+		*p++ = dfa->nodes[node].opr.c;
+	      memset (&state, '\0', sizeof (state));
+	      if (mbrtowc (&wc, (const char *) buf, p - buf,
+			   &state) == p - buf
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
+		re_set_fastmap (fastmap, 0, buf[0]);
+	    }
+#endif
+	}
+      else if (type == SIMPLE_BRACKET)
+	{
+	  int i, ch;
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    {
+	      int j;
+	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+		if (w & ((bitset_word_t) 1 << j))
+		  re_set_fastmap (fastmap, icase, ch);
+	    }
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET)
+	{
+	  int i;
+	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+	      || cset->nranges || cset->nchar_classes)
+	    {
+# ifdef _LIBC
+	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+		{
+		  /* In this case we want to catch the bytes which are
+		     the first byte of any collation elements.
+		     e.g. In da_DK, we want to catch 'a' since "aa"
+			  is a valid collation element, and don't catch
+			  'b' since 'b' is the only collation element
+			  which starts from 'b'.  */
+		  const int32_t *table = (const int32_t *)
+		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+		  for (i = 0; i < SBC_MAX; ++i)
+		    if (table[i] < 0)
+		      re_set_fastmap (fastmap, icase, i);
+		}
+# else
+	      if (dfa->mb_cur_max > 1)
+		for (i = 0; i < SBC_MAX; ++i)
+		  if (__btowc (i) == WEOF)
+		    re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+	    }
+	  for (i = 0; i < cset->nmbchars; ++i)
+	    {
+	      char buf[256];
+	      mbstate_t state;
+	      memset (&state, '\0', sizeof (state));
+	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+		{
+		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+		      != (size_t) -1)
+		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+	       || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+	       || type == END_OF_RE)
+	{
+	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+	  if (type == END_OF_RE)
+	    bufp->can_be_null = 1;
+	  return;
+	}
+    }
+}
+
+/* Entry point for POSIX code.  */
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+			 : RE_SYNTAX_POSIX_BASIC);
+
+  preg->buffer = NULL;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = re_malloc (char, SBC_MAX);
+  if (BE (preg->fastmap == NULL, 0))
+    return REG_ESPACE;
+
+  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+  preg->no_sub = !!(cflags & REG_NOSUB);
+  preg->translate = NULL;
+
+  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  /* We have already checked preg->fastmap != NULL.  */
+  if (BE (ret == REG_NOERROR, 1))
+    /* Compute the fastmap now, since regexec cannot modify the pattern
+       buffer.  This function never fails in this implementation.  */
+    (void) re_compile_fastmap (preg);
+  else
+    {
+      /* Some error occurred while compiling the expression.  */
+      re_free (preg->fastmap);
+      preg->fastmap = NULL;
+    }
+
+  return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+/* regerror ( int errcode, preg, errbuf, errbuf_size) */
+size_t
+regerror (
+    int errcode,
+    const regex_t *__restrict preg,
+    char *__restrict errbuf,
+    size_t errbuf_size)
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (BE (errcode < 0
+	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
+			       / sizeof (__re_error_msgid_idx[0])), 0))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (BE (errbuf_size != 0, 1))
+    {
+      if (BE (msg_size > errbuf_size, 0))
+	{
+#if defined HAVE_MEMPCPY || defined _LIBC
+	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+	  memcpy (errbuf, msg, errbuf_size - 1);
+	  errbuf[errbuf_size - 1] = 0;
+#endif
+	}
+      else
+	memcpy (errbuf, msg, msg_size);
+    }
+
+  return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+   UTF-8 is used.  Otherwise we would allocate memory just to initialize
+   it the same all the time.  UTF-8 is the preferred encoding so this is
+   a worthwhile optimization.  */
+static const bitset_t utf8_sb_map =
+{
+  /* Set the first 128 bits.  */
+  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+  int i, j;
+
+  if (dfa->nodes)
+    for (i = 0; i < dfa->nodes_len; ++i)
+      free_token (dfa->nodes + i);
+  re_free (dfa->nexts);
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      if (dfa->eclosures != NULL)
+	re_node_set_free (dfa->eclosures + i);
+      if (dfa->inveclosures != NULL)
+	re_node_set_free (dfa->inveclosures + i);
+      if (dfa->edests != NULL)
+	re_node_set_free (dfa->edests + i);
+    }
+  re_free (dfa->edests);
+  re_free (dfa->eclosures);
+  re_free (dfa->inveclosures);
+  re_free (dfa->nodes);
+
+  if (dfa->state_table)
+    for (i = 0; i <= dfa->state_hash_mask; ++i)
+      {
+	struct re_state_table_entry *entry = dfa->state_table + i;
+	for (j = 0; j < entry->num; ++j)
+	  {
+	    re_dfastate_t *state = entry->array[j];
+	    free_state (state);
+	  }
+        re_free (entry->array);
+      }
+  re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+  if (dfa->sb_char != utf8_sb_map)
+    re_free (dfa->sb_char);
+#endif
+  re_free (dfa->subexp_map);
+#ifdef DEBUG
+  re_free (dfa->re_str);
+#endif
+
+  re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  if (BE (dfa != NULL, 1))
+    free_dfa_content (dfa);
+  preg->buffer = NULL;
+  preg->allocated = 0;
+
+  re_free (preg->fastmap);
+  preg->fastmap = NULL;
+
+  re_free (preg->translate);
+  preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec above without link errors.  */
+weak_function
+# endif
+re_comp (s)
+     const char *s;
+{
+  reg_errcode_t ret;
+  char *fastmap;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (re_comp_buf.buffer)
+    {
+      fastmap = re_comp_buf.fastmap;
+      re_comp_buf.fastmap = NULL;
+      __regfree (&re_comp_buf);
+      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+      re_comp_buf.fastmap = fastmap;
+    }
+
+  if (re_comp_buf.fastmap == NULL)
+    {
+      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+      if (re_comp_buf.fastmap == NULL)
+	return (char *) gettext (__re_error_msgid
+				 + __re_error_msgid_idx[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+  __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+   Compile the regular expression PATTERN, whose length is LENGTH.
+   SYNTAX indicate regular expression's syntax.  */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+		     reg_syntax_t syntax)
+{
+  reg_errcode_t err = REG_NOERROR;
+  re_dfa_t *dfa;
+  re_string_t regexp;
+
+  /* Initialize the pattern buffer.  */
+  preg->fastmap_accurate = 0;
+  preg->syntax = syntax;
+  preg->not_bol = preg->not_eol = 0;
+  preg->used = 0;
+  preg->re_nsub = 0;
+  preg->can_be_null = 0;
+  preg->regs_allocated = REGS_UNALLOCATED;
+
+  /* Initialize the dfa.  */
+  dfa = (re_dfa_t *) preg->buffer;
+  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+    {
+      /* If zero allocated, but buffer is non-null, try to realloc
+	 enough space.  This loses if buffer's address is bogus, but
+	 that is the user's responsibility.  If ->buffer is NULL this
+	 is a simple allocation.  */
+      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+      if (dfa == NULL)
+	return REG_ESPACE;
+      preg->allocated = sizeof (re_dfa_t);
+      preg->buffer = (unsigned char *) dfa;
+    }
+  preg->used = sizeof (re_dfa_t);
+
+  err = init_dfa (dfa, length);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+#ifdef DEBUG
+  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
+  dfa->re_str = re_malloc (char, length + 1);
+  strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+  __libc_lock_init (dfa->lock);
+
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+			     syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    {
+    re_compile_internal_free_return:
+      free_workarea_compile (preg);
+      re_string_destruct (&regexp);
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+
+  /* Parse the regular expression, and build a structure tree.  */
+  preg->re_nsub = 0;
+  dfa->str_tree = parse (&regexp, preg, syntax, &err);
+  if (BE (dfa->str_tree == NULL, 0))
+    goto re_compile_internal_free_return;
+
+  /* Analyze the tree and create the nfa.  */
+  err = analyze (preg);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+  /* If possible, do searching in single byte encoding to speed things up.  */
+  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+    optimize_utf8 (dfa);
+#endif
+
+  /* Then create the initial state of the dfa.  */
+  err = create_initial_state (dfa);
+
+  /* Release work areas.  */
+  free_workarea_compile (preg);
+  re_string_destruct (&regexp);
+
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+    }
+
+  return err;
+}
+
+/* Initialize DFA.  We use the length of the regular expression PAT_LEN
+   as the initial length of some arrays.  */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+  unsigned int table_size;
+#ifndef _LIBC
+  char *codeset_name;
+#endif
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
+  /* Force allocation of str_tree_storage the first time.  */
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+  /* Avoid overflows.  */
+  if (pat_len == SIZE_MAX)
+    return REG_ESPACE;
+
+  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+  /*  table_size = 2 ^ ceil(log pat_len) */
+  for (table_size = 1; ; table_size <<= 1)
+    if (table_size > pat_len)
+      break;
+
+  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+  dfa->state_hash_mask = table_size - 1;
+
+  dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+  if (dfa->mb_cur_max == 6
+      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+    dfa->is_utf8 = 1;
+  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+		       != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+  codeset_name = nl_langinfo (CODESET);
+# else
+  codeset_name = getenv ("LC_ALL");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LC_CTYPE");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LANG");
+  if (codeset_name == NULL)
+    codeset_name = "";
+  else if (strchr (codeset_name, '.') !=  NULL)
+    codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+  if (strcasecmp (codeset_name, "UTF-8") == 0
+      || strcasecmp (codeset_name, "UTF8") == 0)
+    dfa->is_utf8 = 1;
+
+  /* We check exhaustively in the loop below if this charset is a
+     superset of ASCII.  */
+  dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      if (dfa->is_utf8)
+	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+      else
+	{
+	  int i, j, ch;
+
+	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+	  if (BE (dfa->sb_char == NULL, 0))
+	    return REG_ESPACE;
+
+	  /* Set the bits corresponding to single byte chars.  */
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+	      {
+		wint_t wch = __btowc (ch);
+		if (wch != WEOF)
+		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+		if (isascii (ch) && wch != ch)
+		  dfa->map_notascii = 1;
+# endif
+	      }
+	}
+    }
+#endif
+
+  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+   "word".  In this case "word" means that it is the word construction
+   character used by some operators like "\<", "\>", etc.  */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+  int i, j, ch;
+  dfa->word_ops_used = 1;
+  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+      if (isalnum (ch) || ch == '_')
+	dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling.  */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_storage_t *storage, *next;
+  for (storage = dfa->str_tree_storage; storage; storage = next)
+    {
+      next = storage->next;
+      re_free (storage);
+    }
+  dfa->str_tree_storage = NULL;
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+  dfa->str_tree = NULL;
+  re_free (dfa->org_indices);
+  dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts.  */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+  int first, i;
+  reg_errcode_t err;
+  re_node_set init_nodes;
+
+  /* Initial states have the epsilon closure of the node which is
+     the first node of the regular expression.  */
+  first = dfa->str_tree->first->node_idx;
+  dfa->init_node = first;
+  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* The back-references which are in initial states can epsilon transit,
+     since in this case all of the subexpressions can be null.
+     Then we add epsilon closures of the nodes which are the next nodes of
+     the back-references.  */
+  if (dfa->nbackref > 0)
+    for (i = 0; i < init_nodes.nelem; ++i)
+      {
+	int node_idx = init_nodes.elems[i];
+	re_token_type_t type = dfa->nodes[node_idx].type;
+
+	int clexp_idx;
+	if (type != OP_BACK_REF)
+	  continue;
+	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+	  {
+	    re_token_t *clexp_node;
+	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+	    if (clexp_node->type == OP_CLOSE_SUBEXP
+		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+	      break;
+	  }
+	if (clexp_idx == init_nodes.nelem)
+	  continue;
+
+	if (type == OP_BACK_REF)
+	  {
+	    int dest_idx = dfa->edests[node_idx].elems[0];
+	    if (!re_node_set_contains (&init_nodes, dest_idx))
+	      {
+		re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+		i = 0;
+	      }
+	  }
+      }
+
+  /* It must be the first time to invoke acquire_state.  */
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (BE (dfa->init_state == NULL, 0))
+    return err;
+  if (dfa->init_state->has_constraint)
+    {
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+						       CONTEXT_WORD);
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+						     CONTEXT_NEWLINE);
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+							 &init_nodes,
+							 CONTEXT_NEWLINE
+							 | CONTEXT_BEGBUF);
+      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	      || dfa->init_state_begbuf == NULL, 0))
+	return err;
+    }
+  else
+    dfa->init_state_word = dfa->init_state_nl
+      = dfa->init_state_begbuf = dfa->init_state;
+
+  re_node_set_free (&init_nodes);
+  return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+   DFA nodes where needed.  */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+  int node, i, mb_chars = 0, has_period = 0;
+
+  for (node = 0; node < dfa->nodes_len; ++node)
+    switch (dfa->nodes[node].type)
+      {
+      case CHARACTER:
+	if (dfa->nodes[node].opr.c >= 0x80)
+	  mb_chars = 1;
+	break;
+      case ANCHOR:
+	switch (dfa->nodes[node].opr.idx)
+	  {
+	  case LINE_FIRST:
+	  case LINE_LAST:
+	  case BUF_FIRST:
+	  case BUF_LAST:
+	    break;
+	  default:
+	    /* Word anchors etc. cannot be handled.  */
+	    return;
+	  }
+	break;
+      case OP_PERIOD:
+        has_period = 1;
+        break;
+      case OP_BACK_REF:
+      case OP_ALT:
+      case END_OF_RE:
+      case OP_DUP_ASTERISK:
+      case OP_OPEN_SUBEXP:
+      case OP_CLOSE_SUBEXP:
+	break;
+      case COMPLEX_BRACKET:
+	return;
+      case SIMPLE_BRACKET:
+	/* Just double check.  The non-ASCII range starts at 0x80.  */
+	assert (0x80 % BITSET_WORD_BITS == 0);
+        for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+	  if (dfa->nodes[node].opr.sbcset[i])
+	    return;
+	break;
+      default:
+	abort ();
+      }
+
+  if (mb_chars || has_period)
+    for (node = 0; node < dfa->nodes_len; ++node)
+      {
+	if (dfa->nodes[node].type == CHARACTER
+	    && dfa->nodes[node].opr.c >= 0x80)
+	  dfa->nodes[node].mb_partial = 0;
+	else if (dfa->nodes[node].type == OP_PERIOD)
+	  dfa->nodes[node].type = OP_UTF8_PERIOD;
+      }
+
+  /* The search can be in single byte locale.  */
+  dfa->mb_cur_max = 1;
+  dfa->is_utf8 = 0;
+  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+   "eclosure", and "inveclosure".  */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t ret;
+
+  /* Allocate arrays.  */
+  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+	  || dfa->eclosures == NULL, 0))
+    return REG_ESPACE;
+
+  dfa->subexp_map = re_malloc (int, preg->re_nsub);
+  if (dfa->subexp_map != NULL)
+    {
+      int i;
+      for (i = 0; i < preg->re_nsub; i++)
+	dfa->subexp_map[i] = i;
+      preorder (dfa->str_tree, optimize_subexps, dfa);
+      for (i = 0; i < preg->re_nsub; i++)
+	if (dfa->subexp_map[i] != i)
+	  break;
+      if (i == preg->re_nsub)
+	{
+	  free (dfa->subexp_map);
+	  dfa->subexp_map = NULL;
+	}
+    }
+
+  ret = postorder (dfa->str_tree, lower_subexps, preg);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = postorder (dfa->str_tree, calc_first, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  preorder (dfa->str_tree, calc_next, dfa);
+  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = calc_eclosure (dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  /* We only need this during the prune_impossible_nodes pass in regexec.c;
+     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
+  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+      || dfa->nbackref)
+    {
+      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+      if (BE (dfa->inveclosures == NULL, 0))
+        return REG_ESPACE;
+      ret = calc_inveclosure (dfa);
+    }
+
+  return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+   implement parse tree visits.  Instead, we use parent pointers and
+   some hairy code in these two functions.  */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	   void *extra)
+{
+  bin_tree_t *node, *prev;
+
+  for (node = root; ; )
+    {
+      /* Descend down the tree, preferably to the left (or to the right
+	 if that's the only child).  */
+      while (node->left || node->right)
+	if (node->left)
+          node = node->left;
+        else
+          node = node->right;
+
+      do
+	{
+	  reg_errcode_t err = fn (extra, node);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+          if (node->parent == NULL)
+	    return REG_NOERROR;
+	  prev = node;
+	  node = node->parent;
+	}
+      /* Go up while we have a node that is reached from the right.  */
+      while (node->right == prev || node->right == NULL);
+      node = node->right;
+    }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	  void *extra)
+{
+  bin_tree_t *node;
+
+  for (node = root; ; )
+    {
+      reg_errcode_t err = fn (extra, node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	node = node->left;
+      else
+	{
+	  bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      if (!node)
+	        return REG_NOERROR;
+	    }
+	  node = node->right;
+	}
+    }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
+   backreferences as well.  Requires a preorder visit.  */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+
+  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+    {
+      int idx = node->token.opr.idx;
+      node->token.opr.idx = dfa->subexp_map[idx];
+      dfa->used_bkref_map |= 1 << node->token.opr.idx;
+    }
+
+  else if (node->token.type == SUBEXP
+           && node->left && node->left->token.type == SUBEXP)
+    {
+      int other_idx = node->left->token.opr.idx;
+
+      node->left = node->left->left;
+      if (node->left)
+        node->left->parent = node;
+
+      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+      if (other_idx < BITSET_WORD_BITS)
+	  dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+    }
+
+  return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+  regex_t *preg = (regex_t *) extra;
+  reg_errcode_t err = REG_NOERROR;
+
+  if (node->left && node->left->token.type == SUBEXP)
+    {
+      node->left = lower_subexp (&err, preg, node->left);
+      if (node->left)
+	node->left->parent = node;
+    }
+  if (node->right && node->right->token.type == SUBEXP)
+    {
+      node->right = lower_subexp (&err, preg, node->right);
+      if (node->right)
+	node->right->parent = node;
+    }
+
+  return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *body = node->left;
+  bin_tree_t *op, *cls, *tree1, *tree;
+
+  if (preg->no_sub
+      /* We do not optimize empty subexpressions, because otherwise we may
+	 have bad CONCAT nodes with NULL children.  This is obviously not
+	 very common, so we do not lose much.  An example that triggers
+	 this case is the sed "script" /\(\)/x.  */
+      && node->left != NULL
+      && (node->token.opr.idx >= BITSET_WORD_BITS
+	  || !(dfa->used_bkref_map
+	       & ((bitset_word_t) 1 << node->token.opr.idx))))
+    return node->left;
+
+  /* Convert the SUBEXP node to the concatenation of an
+     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
+  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+  tree = create_tree (dfa, op, tree1, CONCAT);
+  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+  return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+   nodes.  Requires a postorder visit.  */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  if (node->token.type == CONCAT)
+    {
+      node->first = node->left->first;
+      node->node_idx = node->left->node_idx;
+    }
+  else
+    {
+      node->first = node;
+      node->node_idx = re_dfa_add_node (dfa, node->token);
+      if (BE (node->node_idx == -1, 0))
+        return REG_ESPACE;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+  switch (node->token.type)
+    {
+    case OP_DUP_ASTERISK:
+      node->left->next = node;
+      break;
+    case CONCAT:
+      node->left->next = node->right->first;
+      node->right->next = node->next;
+      break;
+    default:
+      if (node->left)
+	node->left->next = node->next;
+      if (node->right)
+        node->right->next = node->next;
+      break;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  int idx = node->node_idx;
+  reg_errcode_t err = REG_NOERROR;
+
+  switch (node->token.type)
+    {
+    case CONCAT:
+      break;
+
+    case END_OF_RE:
+      assert (node->next == NULL);
+      break;
+
+    case OP_DUP_ASTERISK:
+    case OP_ALT:
+      {
+	int left, right;
+	dfa->has_plural_match = 1;
+	if (node->left != NULL)
+	  left = node->left->first->node_idx;
+	else
+	  left = node->next->node_idx;
+	if (node->right != NULL)
+	  right = node->right->first->node_idx;
+	else
+	  right = node->next->node_idx;
+	assert (left > -1);
+	assert (right > -1);
+	err = re_node_set_init_2 (dfa->edests + idx, left, right);
+      }
+      break;
+
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+      break;
+
+    case OP_BACK_REF:
+      dfa->nexts[idx] = node->next->node_idx;
+      if (node->token.type == OP_BACK_REF)
+	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+      break;
+
+    default:
+      assert (!IS_EPSILON_NODE (node->token.type));
+      dfa->nexts[idx] = node->next->node_idx;
+      break;
+    }
+
+  return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+   to their own constraint.  */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+			int root_node, unsigned int init_constraint)
+{
+  int org_node, clone_node, ret;
+  unsigned int constraint = init_constraint;
+  for (org_node = top_org_node, clone_node = top_clone_node;;)
+    {
+      int org_dest, clone_dest;
+      if (dfa->nodes[org_node].type == OP_BACK_REF)
+	{
+	  /* If the back reference epsilon-transit, its destination must
+	     also have the constraint.  Then duplicate the epsilon closure
+	     of the destination of the back reference, and store it in
+	     edests of the back reference.  */
+	  org_dest = dfa->nexts[org_node];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else if (dfa->edests[org_node].nelem == 0)
+	{
+	  /* In case of the node can't epsilon-transit, don't duplicate the
+	     destination and store the original destination as the
+	     destination of the node.  */
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  break;
+	}
+      else if (dfa->edests[org_node].nelem == 1)
+	{
+	  /* In case of the node can epsilon-transit, and it has only one
+	     destination.  */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  if (dfa->nodes[org_node].type == ANCHOR)
+	    {
+	      /* In case of the node has another constraint, append it.  */
+	      if (org_node == root_node && clone_node != org_node)
+		{
+		  /* ...but if the node is root_node itself, it means the
+		     epsilon closure have a loop, then tie it to the
+		     destination of the root_node.  */
+		  ret = re_node_set_insert (dfa->edests + clone_node,
+					    org_dest);
+		  if (BE (ret < 0, 0))
+		    return REG_ESPACE;
+		  break;
+		}
+	      constraint |= dfa->nodes[org_node].opr.ctx_type;
+	    }
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else /* dfa->edests[org_node].nelem == 2 */
+	{
+	  /* In case of the node can epsilon-transit, and it has two
+	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  /* Search for a duplicated node which satisfies the constraint.  */
+	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+	  if (clone_dest == -1)
+	    {
+	      /* There are no such a duplicated node, create a new one.  */
+	      reg_errcode_t err;
+	      clone_dest = duplicate_node (dfa, org_dest, constraint);
+	      if (BE (clone_dest == -1, 0))
+		return REG_ESPACE;
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
+					    root_node, constraint);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  else
+	    {
+	      /* There are a duplicated node which satisfy the constraint,
+		 use it to avoid infinite loop.  */
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	    }
+
+	  org_dest = dfa->edests[org_node].elems[1];
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      org_node = org_dest;
+      clone_node = clone_dest;
+    }
+  return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+   satisfies the constraint CONSTRAINT.  */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+			unsigned int constraint)
+{
+  int idx;
+  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+    {
+      if (org_node == dfa->org_indices[idx]
+	  && constraint == dfa->nodes[idx].constraint)
+	return idx; /* Found.  */
+    }
+  return -1; /* Not found.  */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   Return the index of the new node, or -1 if insufficient storage is
+   available.  */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+  if (BE (dup_idx != -1, 1))
+    {
+      dfa->nodes[dup_idx].constraint = constraint;
+      if (dfa->nodes[org_idx].type == ANCHOR)
+	dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+      dfa->nodes[dup_idx].duplicated = 1;
+
+      /* Store the index of the original node.  */
+      dfa->org_indices[dup_idx] = org_idx;
+    }
+  return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+  int src, idx, ret;
+  for (idx = 0; idx < dfa->nodes_len; ++idx)
+    re_node_set_init_empty (dfa->inveclosures + idx);
+
+  for (src = 0; src < dfa->nodes_len; ++src)
+    {
+      int *elems = dfa->eclosures[src].elems;
+      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+	{
+	  ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+	  if (BE (ret == -1, 0))
+	    return REG_ESPACE;
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA.  */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+  int node_idx, incomplete;
+#ifdef DEBUG
+  assert (dfa->nodes_len > 0);
+#endif
+  incomplete = 0;
+  /* For each nodes, calculate epsilon closure.  */
+  for (node_idx = 0; ; ++node_idx)
+    {
+      reg_errcode_t err;
+      re_node_set eclosure_elem;
+      if (node_idx == dfa->nodes_len)
+	{
+	  if (!incomplete)
+	    break;
+	  incomplete = 0;
+	  node_idx = 0;
+	}
+
+#ifdef DEBUG
+      assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+      /* If we have already calculated, skip it.  */
+      if (dfa->eclosures[node_idx].nelem != 0)
+	continue;
+      /* Calculate epsilon closure of `node_idx'.  */
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      if (dfa->eclosures[node_idx].nelem == 0)
+	{
+	  incomplete = 1;
+	  re_node_set_free (&eclosure_elem);
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE.  */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+  reg_errcode_t err;
+  unsigned int constraint;
+  int i, incomplete;
+  re_node_set eclosure;
+  incomplete = 0;
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* This indicates that we are calculating this node now.
+     We reference this value to avoid infinite loop.  */
+  dfa->eclosures[node].nelem = -1;
+
+  constraint = ((dfa->nodes[node].type == ANCHOR)
+		? dfa->nodes[node].opr.ctx_type : 0);
+  /* If the current node has constraints, duplicate all nodes.
+     Since they must inherit the constraints.  */
+  if (constraint
+      && dfa->edests[node].nelem
+      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+    {
+      err = duplicate_node_closure (dfa, node, node, node, constraint);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  /* Expand each epsilon destination nodes.  */
+  if (IS_EPSILON_NODE(dfa->nodes[node].type))
+    for (i = 0; i < dfa->edests[node].nelem; ++i)
+      {
+	re_node_set eclosure_elem;
+	int edest = dfa->edests[node].elems[i];
+	/* If calculating the epsilon closure of `edest' is in progress,
+	   return intermediate result.  */
+	if (dfa->eclosures[edest].nelem == -1)
+	  {
+	    incomplete = 1;
+	    continue;
+	  }
+	/* If we haven't calculated the epsilon closure of `edest' yet,
+	   calculate now. Otherwise use calculated epsilon closure.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+	    if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  }
+	else
+	  eclosure_elem = dfa->eclosures[edest];
+	/* Merge the epsilon closure of `edest'.  */
+	re_node_set_merge (&eclosure, &eclosure_elem);
+	/* If the epsilon closure of `edest' is incomplete,
+	   the epsilon closure of this node is also incomplete.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    incomplete = 1;
+	    re_node_set_free (&eclosure_elem);
+	  }
+      }
+
+  /* Epsilon closures include itself.  */
+  re_node_set_insert (&eclosure, node);
+  if (incomplete && !root)
+    dfa->eclosures[node].nelem = 0;
+  else
+    dfa->eclosures[node] = eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser.  */
+
+/* Fetch a token from INPUT.
+   We must not use this function inside bracket expressions.  */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+  re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function inside bracket expressions.  */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+  token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+  token->mb_partial = 0;
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      token->mb_partial = 1;
+      return 1;
+    }
+#endif
+  if (c == '\\')
+    {
+      unsigned char c2;
+      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+	{
+	  token->type = BACK_SLASH;
+	  return 1;
+	}
+
+      c2 = re_string_peek_byte_case (input, 1);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+      if (input->mb_cur_max > 1)
+	{
+	  wint_t wc = re_string_wchar_at (input,
+					  re_string_cur_idx (input) + 1);
+	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+	}
+      else
+#endif
+	token->word_char = IS_WORD_CHAR (c2) != 0;
+
+      switch (c2)
+	{
+	case '|':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+	    token->type = OP_ALT;
+	  break;
+	case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	  if (!(syntax & RE_NO_BK_REFS))
+	    {
+	      token->type = OP_BACK_REF;
+	      token->opr.idx = c2 - '1';
+	    }
+	  break;
+	case '<':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_FIRST;
+	    }
+	  break;
+	case '>':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_LAST;
+	    }
+	  break;
+	case 'b':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_DELIM;
+	    }
+	  break;
+	case 'B':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = NOT_WORD_DELIM;
+	    }
+	  break;
+	case 'w':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_WORD;
+	  break;
+	case 'W':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTWORD;
+	  break;
+	case 's':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_SPACE;
+	  break;
+	case 'S':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTSPACE;
+	  break;
+	case '`':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_FIRST;
+	    }
+	  break;
+	case '\'':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_LAST;
+	    }
+	  break;
+	case '(':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_OPEN_SUBEXP;
+	  break;
+	case ')':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_CLOSE_SUBEXP;
+	  break;
+	case '+':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_PLUS;
+	  break;
+	case '?':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_QUESTION;
+	  break;
+	case '{':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_OPEN_DUP_NUM;
+	  break;
+	case '}':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_CLOSE_DUP_NUM;
+	  break;
+	default:
+	  break;
+	}
+      return 2;
+    }
+
+  token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+    }
+  else
+#endif
+    token->word_char = IS_WORD_CHAR (token->opr.c);
+
+  switch (c)
+    {
+    case '\n':
+      if (syntax & RE_NEWLINE_ALT)
+	token->type = OP_ALT;
+      break;
+    case '|':
+      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+	token->type = OP_ALT;
+      break;
+    case '*':
+      token->type = OP_DUP_ASTERISK;
+      break;
+    case '+':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_PLUS;
+      break;
+    case '?':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_QUESTION;
+      break;
+    case '{':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_OPEN_DUP_NUM;
+      break;
+    case '}':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_CLOSE_DUP_NUM;
+      break;
+    case '(':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_OPEN_SUBEXP;
+      break;
+    case ')':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_CLOSE_SUBEXP;
+      break;
+    case '[':
+      token->type = OP_OPEN_BRACKET;
+      break;
+    case '.':
+      token->type = OP_PERIOD;
+      break;
+    case '^':
+      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+	  re_string_cur_idx (input) != 0)
+	{
+	  char prev = re_string_peek_byte (input, -1);
+	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_FIRST;
+      break;
+    case '$':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+	  re_string_cur_idx (input) + 1 != re_string_length (input))
+	{
+	  re_token_t next;
+	  re_string_skip_bytes (input, 1);
+	  peek_token (&next, input, syntax);
+	  re_string_skip_bytes (input, -1);
+	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_LAST;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function out of bracket expressions.  */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      return 1;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+      && re_string_cur_idx (input) + 1 < re_string_length (input))
+    {
+      /* In this case, '\' escape a character.  */
+      unsigned char c2;
+      re_string_skip_bytes (input, 1);
+      c2 = re_string_peek_byte (input, 0);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      return 1;
+    }
+  if (c == '[') /* '[' is a special char in a bracket exps.  */
+    {
+      unsigned char c2;
+      int token_len;
+      if (re_string_cur_idx (input) + 1 < re_string_length (input))
+	c2 = re_string_peek_byte (input, 1);
+      else
+	c2 = 0;
+      token->opr.c = c2;
+      token_len = 2;
+      switch (c2)
+	{
+	case '.':
+	  token->type = OP_OPEN_COLL_ELEM;
+	  break;
+	case '=':
+	  token->type = OP_OPEN_EQUIV_CLASS;
+	  break;
+	case ':':
+	  if (syntax & RE_CHAR_CLASSES)
+	    {
+	      token->type = OP_OPEN_CHAR_CLASS;
+	      break;
+	    }
+	  /* else fall through.  */
+	default:
+	  token->type = CHARACTER;
+	  token->opr.c = c;
+	  token_len = 1;
+	  break;
+	}
+      return token_len;
+    }
+  switch (c)
+    {
+    case '-':
+      token->type = OP_CHARSET_RANGE;
+      break;
+    case ']':
+      token->type = OP_CLOSE_BRACKET;
+      break;
+    case '^':
+      token->type = OP_NON_MATCH_LIST;
+      break;
+    default:
+      token->type = CHARACTER;
+    }
+  return 1;
+}
+
+/* Functions for parser.  */
+
+/* Entry point of the parser.
+   Parse the regular expression REGEXP and return the structure tree.
+   If an error is occured, ERR is set by error code, and return NULL.
+   This function build the following tree, from regular expression <reg_exp>:
+	   CAT
+	   / \
+	  /   \
+   <reg_exp>  EOR
+
+   CAT means concatenation.
+   EOR means end of regular expression.  */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+       reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *eor, *root;
+  re_token_t current_token;
+  dfa->syntax = syntax;
+  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+  if (tree != NULL)
+    root = create_tree (dfa, tree, eor, CONCAT);
+  else
+    root = eor;
+  if (BE (eor == NULL || root == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  return root;
+}
+
+/* This function build the following tree, from regular expression
+   <branch1>|<branch2>:
+	   ALT
+	   / \
+	  /   \
+   <branch1> <branch2>
+
+   ALT means alternative, which represents the operator `|'.  */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *branch = NULL;
+  tree = parse_branch (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type == OP_ALT)
+    {
+      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+      if (token->type != OP_ALT && token->type != END_OF_RE
+	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+	{
+	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
+	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
+	    return NULL;
+	}
+      else
+	branch = NULL;
+      tree = create_tree (dfa, tree, branch, OP_ALT);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   <exp1><exp2>:
+	CAT
+	/ \
+       /   \
+   <exp1> <exp2>
+
+   CAT means concatenation.  */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	      reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  bin_tree_t *tree, *exp;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  tree = parse_expression (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type != OP_ALT && token->type != END_OF_RE
+	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+    {
+      exp = parse_expression (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && exp == NULL, 0))
+	{
+	  return NULL;
+	}
+      if (tree != NULL && exp != NULL)
+	{
+	  tree = create_tree (dfa, tree, exp, CONCAT);
+	  if (tree == NULL)
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else if (tree == NULL)
+	tree = exp;
+      /* Otherwise exp == NULL, we don't need to create new tree.  */
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+	 *
+	 |
+	 a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+		  reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  switch (token->type)
+    {
+    case CHARACTER:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (!re_string_eoi (regexp)
+		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+	    {
+	      bin_tree_t *mbc_remain;
+	      fetch_token (token, regexp, syntax);
+	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+	      if (BE (mbc_remain == NULL || tree == NULL, 0))
+		{
+		  *err = REG_ESPACE;
+		  return NULL;
+		}
+	    }
+	}
+#endif
+      break;
+    case OP_OPEN_SUBEXP:
+      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_OPEN_BRACKET:
+      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_BACK_REF:
+      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+	{
+	  *err = REG_ESUBREG;
+	  return NULL;
+	}
+      dfa->used_bkref_map |= 1 << token->opr.idx;
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      ++dfa->nbackref;
+      dfa->has_mb_node = 1;
+      break;
+    case OP_OPEN_DUP_NUM:
+      if (syntax & RE_CONTEXT_INVALID_DUP)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      /* FALLTHROUGH */
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+    case OP_DUP_QUESTION:
+      if (syntax & RE_CONTEXT_INVALID_OPS)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      else if (syntax & RE_CONTEXT_INDEP_OPS)
+	{
+	  fetch_token (token, regexp, syntax);
+	  return parse_expression (regexp, preg, token, syntax, nest, err);
+	}
+      /* else fall through  */
+    case OP_CLOSE_SUBEXP:
+      if ((token->type == OP_CLOSE_SUBEXP) &&
+	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+	{
+	  *err = REG_ERPAREN;
+	  return NULL;
+	}
+      /* else fall through  */
+    case OP_CLOSE_DUP_NUM:
+      /* We treat it as a normal character.  */
+
+      /* Then we can these characters as normal characters.  */
+      token->type = CHARACTER;
+      /* mb_partial and word_char bits should be initialized already
+	 by peek_token.  */
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      break;
+    case ANCHOR:
+      if ((token->opr.ctx_type
+	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+	  && dfa->word_ops_used == 0)
+	init_word_char (dfa);
+      if (token->opr.ctx_type == WORD_DELIM
+          || token->opr.ctx_type == NOT_WORD_DELIM)
+	{
+	  bin_tree_t *tree_first, *tree_last;
+	  if (token->opr.ctx_type == WORD_DELIM)
+	    {
+	      token->opr.ctx_type = WORD_FIRST;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = WORD_LAST;
+            }
+          else
+            {
+	      token->opr.ctx_type = INSIDE_WORD;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = INSIDE_NOTWORD;
+            }
+	  tree_last = create_token_tree (dfa, NULL, NULL, token);
+	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else
+	{
+	  tree = create_token_tree (dfa, NULL, NULL, token);
+	  if (BE (tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      /* We must return here, since ANCHORs can't be followed
+	 by repetition operators.
+	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
+      fetch_token (token, regexp, syntax);
+      return tree;
+    case OP_PERIOD:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      if (dfa->mb_cur_max > 1)
+	dfa->has_mb_node = 1;
+      break;
+    case OP_WORD:
+    case OP_NOTWORD:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "alnum",
+				 (const unsigned char *) "_",
+				 token->type == OP_NOTWORD, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_SPACE:
+    case OP_NOTSPACE:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "space",
+				 (const unsigned char *) "",
+				 token->type == OP_NOTSPACE, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_ALT:
+    case END_OF_RE:
+      return NULL;
+    case BACK_SLASH:
+      *err = REG_EESCAPE;
+      return NULL;
+    default:
+      /* Must not happen?  */
+#ifdef DEBUG
+      assert (0);
+#endif
+      return NULL;
+    }
+  fetch_token (token, regexp, syntax);
+
+  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+    {
+      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      /* In BRE consecutive duplications are not allowed.  */
+      if ((syntax & RE_CONTEXT_INVALID_DUP)
+	  && (token->type == OP_DUP_ASTERISK
+	      || token->type == OP_OPEN_DUP_NUM))
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+    }
+
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   (<reg_exp>):
+	 SUBEXP
+	    |
+	<reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  size_t cur_nsub;
+  cur_nsub = preg->re_nsub++;
+
+  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+  /* The subexpression may be a null string.  */
+  if (token->type == OP_CLOSE_SUBEXP)
+    tree = NULL;
+  else
+    {
+      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+        *err = REG_EPAREN;
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  tree = create_tree (dfa, tree, NULL, SUBEXP);
+  if (BE (tree == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  tree->token.opr.idx = cur_nsub;
+  return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+  bin_tree_t *tree = NULL, *old_tree = NULL;
+  int i, start, end, start_idx = re_string_cur_idx (regexp);
+  re_token_t start_token = *token;
+
+  if (token->type == OP_OPEN_DUP_NUM)
+    {
+      end = 0;
+      start = fetch_number (regexp, token, syntax);
+      if (start == -1)
+	{
+	  if (token->type == CHARACTER && token->opr.c == ',')
+	    start = 0; /* We treat "{,m}" as "{0,m}".  */
+	  else
+	    {
+	      *err = REG_BADBR; /* <re>{} is invalid.  */
+	      return NULL;
+	    }
+	}
+      if (BE (start != -2, 1))
+	{
+	  /* We treat "{n}" as "{n,n}".  */
+	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+		 : ((token->type == CHARACTER && token->opr.c == ',')
+		    ? fetch_number (regexp, token, syntax) : -2));
+	}
+      if (BE (start == -2 || end == -2, 0))
+	{
+	  /* Invalid sequence.  */
+	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+	    {
+	      if (token->type == END_OF_RE)
+		*err = REG_EBRACE;
+	      else
+		*err = REG_BADBR;
+
+	      return NULL;
+	    }
+
+	  /* If the syntax bit is set, rollback.  */
+	  re_string_set_index (regexp, start_idx);
+	  *token = start_token;
+	  token->type = CHARACTER;
+	  /* mb_partial and word_char bits should be already initialized by
+	     peek_token.  */
+	  return elem;
+	}
+
+      if (BE (end != -1 && start > end, 0))
+	{
+	  /* First number greater than second.  */
+	  *err = REG_BADBR;
+	  return NULL;
+	}
+    }
+  else
+    {
+      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+    }
+
+  fetch_token (token, regexp, syntax);
+
+  if (BE (elem == NULL, 0))
+    return NULL;
+  if (BE (start == 0 && end == 0, 0))
+    {
+      postorder (elem, free_tree, NULL);
+      return NULL;
+    }
+
+  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+  if (BE (start > 0, 0))
+    {
+      tree = elem;
+      for (i = 2; i <= start; ++i)
+	{
+	  elem = duplicate_tree (elem, dfa);
+	  tree = create_tree (dfa, tree, elem, CONCAT);
+	  if (BE (elem == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
+
+      if (start == end)
+	return tree;
+
+      /* Duplicate ELEM before it is marked optional.  */
+      elem = duplicate_tree (elem, dfa);
+      old_tree = tree;
+    }
+  else
+    old_tree = NULL;
+
+  if (elem->token.type == SUBEXP)
+    postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+  tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+  if (BE (tree == NULL, 0))
+    goto parse_dup_op_espace;
+
+  /* This loop is actually executed only when end != -1,
+     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
+     already created the start+1-th copy.  */
+  for (i = start + 2; i <= end; ++i)
+    {
+      elem = duplicate_tree (elem, dfa);
+      tree = create_tree (dfa, tree, elem, CONCAT);
+      if (BE (elem == NULL || tree == NULL, 0))
+        goto parse_dup_op_espace;
+
+      tree = create_tree (dfa, tree, NULL, OP_ALT);
+      if (BE (tree == NULL, 0))
+        goto parse_dup_op_espace;
+    }
+
+  if (old_tree)
+    tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+  return tree;
+
+ parse_dup_op_espace:
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+   I'm not sure, but maybe enough.  */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+		 bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+  unsigned int start_ch, end_ch;
+  /* Equivalence Classes and Character Classes can't be a range start/end.  */
+  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	  0))
+    return REG_ERANGE;
+
+  /* We can handle no multi character collating elements without libc
+     support.  */
+  if (BE ((start_elem->type == COLL_SYM
+	   && strlen ((char *) start_elem->opr.name) > 1)
+	  || (end_elem->type == COLL_SYM
+	      && strlen ((char *) end_elem->opr.name) > 1), 0))
+    return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+  {
+    wchar_t wc;
+    wint_t start_wc;
+    wint_t end_wc;
+    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+		? __btowc (start_ch) : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+	      ? __btowc (end_ch) : end_elem->opr.wch);
+    if (start_wc == WEOF || end_wc == WEOF)
+      return REG_ECOLLATE;
+    cmp_buf[0] = start_wc;
+    cmp_buf[4] = end_wc;
+    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+      return REG_ERANGE;
+
+    /* Got valid collation sequence values, add them as a new entry.
+       However, for !_LIBC we have no collation elements: if the
+       character set is single byte, the single byte character set
+       that we build below suffices.  parse_bracket_exp passes
+       no MBCSET if dfa->mb_cur_max == 1.  */
+    if (mbcset)
+      {
+        /* Check the space of the arrays.  */
+        if (BE (*range_alloc == mbcset->nranges, 0))
+          {
+	    /* There is not enough space, need realloc.  */
+	    wchar_t *new_array_start, *new_array_end;
+	    int new_nranges;
+
+	    /* +1 in case of mbcset->nranges is 0.  */
+	    new_nranges = 2 * mbcset->nranges + 1;
+	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
+	       are NULL if *range_alloc == 0.  */
+	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+				          new_nranges);
+	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+				        new_nranges);
+
+	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	      return REG_ESPACE;
+
+	    mbcset->range_starts = new_array_start;
+	    mbcset->range_ends = new_array_end;
+	    *range_alloc = new_nranges;
+          }
+
+        mbcset->range_starts[mbcset->nranges] = start_wc;
+        mbcset->range_ends[mbcset->nranges++] = end_wc;
+      }
+
+    /* Build the table for single byte characters.  */
+    for (wc = 0; wc < SBC_MAX; ++wc)
+      {
+	cmp_buf[2] = wc;
+	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+	    && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+	  bitset_set (sbcset, wc);
+      }
+  }
+# else /* not RE_ENABLE_I18N */
+  {
+    unsigned int ch;
+    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    if (start_ch > end_ch)
+      return REG_ERANGE;
+    /* Build the table for single byte characters.  */
+    for (ch = 0; ch < SBC_MAX; ++ch)
+      if (start_ch <= ch  && ch <= end_ch)
+	bitset_set (sbcset, ch);
+  }
+# endif /* not RE_ENABLE_I18N */
+  return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+  size_t name_len = strlen ((const char *) name);
+  if (BE (name_len != 1, 0))
+    return REG_ECOLLATE;
+  else
+    {
+      bitset_set (sbcset, name[0]);
+      return REG_NOERROR;
+    }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+		   reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Seek the collating symbol entry correspondings to NAME.
+     Return the index of the symbol in the SYMB_TABLE.  */
+
+  auto inline int32_t
+  __attribute ((always_inline))
+  seek_collating_symbol_entry (name, name_len)
+	 const unsigned char *name;
+	 size_t name_len;
+    {
+      int32_t hash = elem_hash ((const char *) name, name_len);
+      int32_t elem = hash % table_size;
+      if (symb_table[2 * elem] != 0)
+	{
+	  int32_t second = hash % (table_size - 2) + 1;
+
+	  do
+	    {
+	      /* First compare the hashing value.  */
+	      if (symb_table[2 * elem] == hash
+		  /* Compare the length of the name.  */
+		  && name_len == extra[symb_table[2 * elem + 1]]
+		  /* Compare the name.  */
+		  && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+			     name_len) == 0)
+		{
+		  /* Yep, this is the entry.  */
+		  break;
+		}
+
+	      /* Next entry.  */
+	      elem += second;
+	    }
+	  while (symb_table[2 * elem] != 0);
+	}
+      return elem;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  auto inline unsigned int
+  __attribute ((always_inline))
+  lookup_collation_sequence_value (br_elem)
+	 bracket_elem_t *br_elem;
+    {
+      if (br_elem->type == SB_CHAR)
+	{
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    return collseqmb[br_elem->opr.ch];
+	  else
+	    {
+	      wint_t wc = __btowc (br_elem->opr.ch);
+	      return __collseq_table_lookup (collseqwc, wc);
+	    }
+	}
+      else if (br_elem->type == MB_CHAR)
+	{
+	  return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+	}
+      else if (br_elem->type == COLL_SYM)
+	{
+	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+	  if (nrules != 0)
+	    {
+	      int32_t elem, idx;
+	      elem = seek_collating_symbol_entry (br_elem->opr.name,
+						  sym_name_len);
+	      if (symb_table[2 * elem] != 0)
+		{
+		  /* We found the entry.  */
+		  idx = symb_table[2 * elem + 1];
+		  /* Skip the name of collating element name.  */
+		  idx += 1 + extra[idx];
+		  /* Skip the byte sequence of the collating element.  */
+		  idx += 1 + extra[idx];
+		  /* Adjust for the alignment.  */
+		  idx = (idx + 3) & ~3;
+		  /* Skip the multibyte collation sequence value.  */
+		  idx += sizeof (unsigned int);
+		  /* Skip the wide char sequence of the collating element.  */
+		  idx += sizeof (unsigned int) *
+		    (1 + *(unsigned int *) (extra + idx));
+		  /* Return the collation sequence value.  */
+		  return *(unsigned int *) (extra + idx);
+		}
+	      else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+		{
+		  /* No valid character.  Match it as a single byte
+		     character.  */
+		  return collseqmb[br_elem->opr.name[0]];
+		}
+	    }
+	  else if (sym_name_len == 1)
+	    return collseqmb[br_elem->opr.name[0]];
+	}
+      return UINT_MAX;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+	 re_charset_t *mbcset;
+	 int *range_alloc;
+	 bitset_t sbcset;
+	 bracket_elem_t *start_elem, *end_elem;
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+      /* Equivalence Classes and Character Classes can't be a range
+	 start/end.  */
+      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	      0))
+	return REG_ERANGE;
+
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+	return REG_ECOLLATE;
+      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+	return REG_ERANGE;
+
+      /* Got valid collation sequence values, add them as a new entry.
+	 However, if we have no collation elements, and the character set
+	 is single byte, the single byte character set that we
+	 build below suffices. */
+      if (nrules > 0 || dfa->mb_cur_max > 1)
+	{
+          /* Check the space of the arrays.  */
+          if (BE (*range_alloc == mbcset->nranges, 0))
+	    {
+	      /* There is not enough space, need realloc.  */
+	      uint32_t *new_array_start;
+	      uint32_t *new_array_end;
+	      int new_nranges;
+
+	      /* +1 in case of mbcset->nranges is 0.  */
+	      new_nranges = 2 * mbcset->nranges + 1;
+	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					    new_nranges);
+	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+				          new_nranges);
+
+	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	        return REG_ESPACE;
+
+	      mbcset->range_starts = new_array_start;
+	      mbcset->range_ends = new_array_end;
+	      *range_alloc = new_nranges;
+	    }
+
+          mbcset->range_starts[mbcset->nranges] = start_collseq;
+          mbcset->range_ends[mbcset->nranges++] = end_collseq;
+	}
+
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch < SBC_MAX; ch++)
+	{
+	  uint32_t ch_collseq;
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    ch_collseq = collseqmb[ch];
+	  else
+	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	    bitset_set (sbcset, ch);
+	}
+      return REG_NOERROR;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument sinse we may update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+	 re_charset_t *mbcset;
+	 int *coll_sym_alloc;
+	 bitset_t sbcset;
+	 const unsigned char *name;
+    {
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
+	{
+	  elem = seek_collating_symbol_entry (name, name_len);
+	  if (symb_table[2 * elem] != 0)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	    }
+	  else if (symb_table[2 * elem] == 0 && name_len == 1)
+	    {
+	      /* No valid character, treat it as a normal
+		 character.  */
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	  else
+	    return REG_ECOLLATE;
+
+	  /* Got valid collation sequence, add it as a new entry.  */
+	  /* Check the space of the arrays.  */
+	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+	    {
+	      /* Not enough, realloc it.  */
+	      /* +1 in case of mbcset->ncoll_syms is 0.  */
+	      int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	      /* Use realloc since mbcset->coll_syms is NULL
+		 if *alloc == 0.  */
+	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+						   new_coll_sym_alloc);
+	      if (BE (new_coll_syms == NULL, 0))
+		return REG_ESPACE;
+	      mbcset->coll_syms = new_coll_syms;
+	      *coll_sym_alloc = new_coll_sym_alloc;
+	    }
+	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+	  return REG_NOERROR;
+	}
+      else
+	{
+	  if (BE (name_len != 1, 0))
+	    return REG_ECOLLATE;
+	  else
+	    {
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	}
+    }
+#endif
+
+  re_token_t br_token;
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+  int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  int non_match = 0;
+  bin_tree_t *work_tree;
+  int token_len;
+  int first_round = 1;
+#ifdef _LIBC
+  collseqmb = (const unsigned char *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules)
+    {
+      /*
+      if (MB_CUR_MAX > 1)
+      */
+      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						  _NL_COLLATE_SYMB_TABLEMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_SYMB_EXTRAMB);
+    }
+#endif
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+  if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  token_len = peek_token_bracket (token, regexp, syntax);
+  if (BE (token->type == END_OF_RE, 0))
+    {
+      *err = REG_BADPAT;
+      goto parse_bracket_exp_free_return;
+    }
+  if (token->type == OP_NON_MATCH_LIST)
+    {
+#ifdef RE_ENABLE_I18N
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+      non_match = 1;
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set (sbcset, '\0');
+      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_BADPAT;
+	  goto parse_bracket_exp_free_return;
+	}
+    }
+
+  /* We treat the first ']' as a normal character.  */
+  if (token->type == OP_CLOSE_BRACKET)
+    token->type = CHARACTER;
+
+  while (1)
+    {
+      bracket_elem_t start_elem, end_elem;
+      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+      reg_errcode_t ret;
+      int token_len2 = 0, is_range_exp = 0;
+      re_token_t token2;
+
+      start_elem.opr.name = start_name_buf;
+      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+				   syntax, first_round);
+      if (BE (ret != REG_NOERROR, 0))
+	{
+	  *err = ret;
+	  goto parse_bracket_exp_free_return;
+	}
+      first_round = 0;
+
+      /* Get information about the next token.  We need it in any case.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+
+      /* Do not check for ranges if we know they are not allowed.  */
+      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+	{
+	  if (BE (token->type == END_OF_RE, 0))
+	    {
+	      *err = REG_EBRACK;
+	      goto parse_bracket_exp_free_return;
+	    }
+	  if (token->type == OP_CHARSET_RANGE)
+	    {
+	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
+	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
+	      if (BE (token2.type == END_OF_RE, 0))
+		{
+		  *err = REG_EBRACK;
+		  goto parse_bracket_exp_free_return;
+		}
+	      if (token2.type == OP_CLOSE_BRACKET)
+		{
+		  /* We treat the last '-' as a normal character.  */
+		  re_string_skip_bytes (regexp, -token_len);
+		  token->type = CHARACTER;
+		}
+	      else
+		is_range_exp = 1;
+	    }
+	}
+
+      if (is_range_exp == 1)
+	{
+	  end_elem.opr.name = end_name_buf;
+	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+				       dfa, syntax, 1);
+	  if (BE (ret != REG_NOERROR, 0))
+	    {
+	      *err = ret;
+	      goto parse_bracket_exp_free_return;
+	    }
+
+	  token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
+				  &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+	  *err = build_range_exp (sbcset,
+				  dfa->mb_cur_max > 1 ? mbcset : NULL,
+				  &range_alloc, &start_elem, &end_elem);
+# else
+	  *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+	  if (BE (*err != REG_NOERROR, 0))
+	    goto parse_bracket_exp_free_return;
+	}
+      else
+	{
+	  switch (start_elem.type)
+	    {
+	    case SB_CHAR:
+	      bitset_set (sbcset, start_elem.opr.ch);
+	      break;
+#ifdef RE_ENABLE_I18N
+	    case MB_CHAR:
+	      /* Check whether the array has enough space.  */
+	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+		{
+		  wchar_t *new_mbchars;
+		  /* Not enough, realloc it.  */
+		  /* +1 in case of mbcset->nmbchars is 0.  */
+		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
+		  /* Use realloc since array is NULL if *alloc == 0.  */
+		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+					    mbchar_alloc);
+		  if (BE (new_mbchars == NULL, 0))
+		    goto parse_bracket_exp_espace;
+		  mbcset->mbchars = new_mbchars;
+		}
+	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+	      break;
+#endif /* RE_ENABLE_I18N */
+	    case EQUIV_CLASS:
+	      *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+					mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+					start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case COLL_SYM:
+	      *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+					     mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+					     start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case CHAR_CLASS:
+	      *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+				      mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+				      start_elem.opr.name, syntax);
+	      if (BE (*err != REG_NOERROR, 0))
+	       goto parse_bracket_exp_free_return;
+	      break;
+	    default:
+	      assert (0);
+	      break;
+	    }
+	}
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_EBRACK;
+	  goto parse_bracket_exp_free_return;
+	}
+      if (token->type == OP_CLOSE_BRACKET)
+	break;
+    }
+
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+
+  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+						     || mbcset->non_match)))
+    {
+      bin_tree_t *mbc_tree;
+      int sbc_idx;
+      /* Build a tree for complex bracket.  */
+      dfa->has_mb_node = 1;
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+	if (sbcset[sbc_idx])
+	  break;
+      /* If there are no bits set in sbcset, there is no point
+	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
+      if (sbc_idx < BITSET_WORDS)
+	{
+          /* Build a tree for simple bracket.  */
+          br_token.type = SIMPLE_BRACKET;
+          br_token.opr.sbcset = sbcset;
+          work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+
+          /* Then join them by ALT node.  */
+          work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+	}
+      else
+	{
+	  re_free (sbcset);
+	  work_tree = mbc_tree;
+	}
+    }
+  else
+#endif /* not RE_ENABLE_I18N */
+    {
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif
+      /* Build a tree for simple bracket.  */
+      br_token.type = SIMPLE_BRACKET;
+      br_token.opr.sbcset = sbcset;
+      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (work_tree == NULL, 0))
+        goto parse_bracket_exp_espace;
+    }
+  return work_tree;
+
+ parse_bracket_exp_espace:
+  *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  return NULL;
+}
+
+/* Parse an element in the bracket expression.  */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+		       re_token_t *token, int token_len, re_dfa_t *dfa,
+		       reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+  int cur_char_size;
+  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+  if (cur_char_size > 1)
+    {
+      elem->type = MB_CHAR;
+      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+      re_string_skip_bytes (regexp, cur_char_size);
+      return REG_NOERROR;
+    }
+#endif /* RE_ENABLE_I18N */
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+      || token->type == OP_OPEN_EQUIV_CLASS)
+    return parse_bracket_symbol (elem, regexp, token);
+  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+    {
+      /* A '-' must only appear as anything but a range indicator before
+	 the closing bracket.  Everything else is an error.  */
+      re_token_t token2;
+      (void) peek_token_bracket (&token2, regexp, syntax);
+      if (token2.type != OP_CLOSE_BRACKET)
+	/* The actual error value is not standardized since this whole
+	   case is undefined.  But ERANGE makes good sense.  */
+	return REG_ERANGE;
+    }
+  elem->type = SB_CHAR;
+  elem->opr.ch = token->opr.c;
+  return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
+   such as [:<character_class>:], [.<collating_element>.], and
+   [=<equivalent_class>=].  */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+		      re_token_t *token)
+{
+  unsigned char ch, delim = token->opr.c;
+  int i = 0;
+  if (re_string_eoi(regexp))
+    return REG_EBRACK;
+  for (;; ++i)
+    {
+      if (i >= BRACKET_NAME_BUF_SIZE)
+	return REG_EBRACK;
+      if (token->type == OP_OPEN_CHAR_CLASS)
+	ch = re_string_fetch_byte_case (regexp);
+      else
+	ch = re_string_fetch_byte (regexp);
+      if (re_string_eoi(regexp))
+	return REG_EBRACK;
+      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+	break;
+      elem->opr.name[i] = ch;
+    }
+  re_string_skip_bytes (regexp, 1);
+  elem->opr.name[i] = '\0';
+  switch (token->type)
+    {
+    case OP_OPEN_COLL_ELEM:
+      elem->type = COLL_SYM;
+      break;
+    case OP_OPEN_EQUIV_CLASS:
+      elem->type = EQUIV_CLASS;
+      break;
+    case OP_OPEN_CHAR_CLASS:
+      elem->type = CHAR_CLASS;
+      break;
+    default:
+      break;
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the equivalence class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+		   int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules != 0)
+    {
+      const int32_t *table, *indirect;
+      const unsigned char *weights, *extra, *cp;
+      unsigned char char_buf[2];
+      int32_t idx1, idx2;
+      unsigned int ch;
+      size_t len;
+      /* This #include defines a local function!  */
+# include <locale/weight.h>
+      /* Calculate the index for equivalence class.  */
+      cp = name;
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+					       _NL_COLLATE_WEIGHTMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      idx1 = findidx (&cp);
+      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+	/* This isn't a valid character.  */
+	return REG_ECOLLATE;
+
+      /* Build single byte matcing table for this equivalence class.  */
+      char_buf[1] = (unsigned char) '\0';
+      len = weights[idx1];
+      for (ch = 0; ch < SBC_MAX; ++ch)
+	{
+	  char_buf[0] = ch;
+	  cp = char_buf;
+	  idx2 = findidx (&cp);
+/*
+	  idx2 = table[ch];
+*/
+	  if (idx2 == 0)
+	    /* This isn't a valid character.  */
+	    continue;
+	  if (len == weights[idx2])
+	    {
+	      int cnt = 0;
+	      while (cnt <= len &&
+		     weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+		++cnt;
+
+	      if (cnt > len)
+		bitset_set (sbcset, ch);
+	    }
+	}
+      /* Check whether the array has enough space.  */
+      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->nequiv_classes is 0.  */
+	  int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+	  /* Use realloc since the array is NULL if *alloc == 0.  */
+	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+						   int32_t,
+						   new_equiv_class_alloc);
+	  if (BE (new_equiv_classes == NULL, 0))
+	    return REG_ESPACE;
+	  mbcset->equiv_classes = new_equiv_classes;
+	  *equiv_class_alloc = new_equiv_class_alloc;
+	}
+      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+    }
+  else
+#endif /* _LIBC */
+    {
+      if (BE (strlen ((const char *) name) != 1, 0))
+	return REG_ECOLLATE;
+      bitset_set (sbcset, *name);
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the character class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 re_charset_t *mbcset, int *char_class_alloc,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+  int i;
+  const char *name = (const char *) class_name;
+
+  /* In case of REG_ICASE "upper" and "lower" match the both of
+     upper and lower cases.  */
+  if ((syntax & RE_ICASE)
+      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+    name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+  /* Check the space of the arrays.  */
+  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+					       new_char_class_alloc);
+      if (BE (new_char_classes == NULL, 0))
+	return REG_ESPACE;
+      mbcset->char_classes = new_char_classes;
+      *char_class_alloc = new_char_class_alloc;
+    }
+  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)	\
+  do {						\
+    if (BE (trans != NULL, 0))			\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, trans[i]);	\
+      }						\
+    else					\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, i);		\
+      }						\
+  } while (0)
+
+  if (strcmp (name, "alnum") == 0)
+    BUILD_CHARCLASS_LOOP (isalnum);
+  else if (strcmp (name, "cntrl") == 0)
+    BUILD_CHARCLASS_LOOP (iscntrl);
+  else if (strcmp (name, "lower") == 0)
+    BUILD_CHARCLASS_LOOP (islower);
+  else if (strcmp (name, "space") == 0)
+    BUILD_CHARCLASS_LOOP (isspace);
+  else if (strcmp (name, "alpha") == 0)
+    BUILD_CHARCLASS_LOOP (isalpha);
+  else if (strcmp (name, "digit") == 0)
+    BUILD_CHARCLASS_LOOP (isdigit);
+  else if (strcmp (name, "print") == 0)
+    BUILD_CHARCLASS_LOOP (isprint);
+  else if (strcmp (name, "upper") == 0)
+    BUILD_CHARCLASS_LOOP (isupper);
+  else if (strcmp (name, "blank") == 0)
+    BUILD_CHARCLASS_LOOP (isblank);
+  else if (strcmp (name, "graph") == 0)
+    BUILD_CHARCLASS_LOOP (isgraph);
+  else if (strcmp (name, "punct") == 0)
+    BUILD_CHARCLASS_LOOP (ispunct);
+  else if (strcmp (name, "xdigit") == 0)
+    BUILD_CHARCLASS_LOOP (isxdigit);
+  else
+    return REG_ECTYPE;
+
+  return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+		    const unsigned char *class_name,
+		    const unsigned char *extra, int non_match,
+		    reg_errcode_t *err)
+{
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  reg_errcode_t ret;
+  re_token_t br_token;
+  bin_tree_t *tree;
+
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+  if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  if (non_match)
+    {
+#ifdef RE_ENABLE_I18N
+      /*
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set(cset->sbcset, '\0');
+      */
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+    }
+
+  /* We don't care the syntax in this case.  */
+  ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+			 mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+			 class_name, 0);
+
+  if (BE (ret != REG_NOERROR, 0))
+    {
+      re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+      *err = ret;
+      return NULL;
+    }
+  /* \w match '_' also.  */
+  for (; *extra; extra++)
+    bitset_set (sbcset, *extra);
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  tree = create_token_tree (dfa, NULL, NULL, &br_token);
+  if (BE (tree == NULL, 0))
+    goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto build_word_op_espace;
+      /* Then join them by ALT node.  */
+      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+      if (BE (mbc_tree != NULL, 1))
+	return tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+   Fetch a number from `input', and return the number.
+   Return -1, if the number field is empty like "{,1}".
+   Return -2, If an error is occured.  */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+  int num = -1;
+  unsigned char c;
+  while (1)
+    {
+      fetch_token (token, input, syntax);
+      c = token->opr.c;
+      if (BE (token->type == END_OF_RE, 0))
+	return -2;
+      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+	break;
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+	     ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+      num = (num > RE_DUP_MAX) ? -2 : num;
+    }
+  return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+  re_free (cset->mbchars);
+# ifdef _LIBC
+  re_free (cset->coll_syms);
+  re_free (cset->equiv_classes);
+  re_free (cset->range_starts);
+  re_free (cset->range_ends);
+# endif
+  re_free (cset->char_classes);
+  re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation.  */
+
+/* Create a tree node.  */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+	     re_token_type_t type)
+{
+  re_token_t t;
+  t.type = type;
+  return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+		   const re_token_t *token)
+{
+  bin_tree_t *tree;
+  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+    {
+      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+      if (storage == NULL)
+	return NULL;
+      storage->next = dfa->str_tree_storage;
+      dfa->str_tree_storage = storage;
+      dfa->str_tree_storage_idx = 0;
+    }
+  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+  tree->parent = NULL;
+  tree->left = left;
+  tree->right = right;
+  tree->token = *token;
+  tree->token.duplicated = 0;
+  tree->token.opt_subexp = 0;
+  tree->first = NULL;
+  tree->next = NULL;
+  tree->node_idx = -1;
+
+  if (left != NULL)
+    left->parent = tree;
+  if (right != NULL)
+    right->parent = tree;
+  return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+   To be called from preorder or postorder.  */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+  int idx = (int) (long) extra;
+  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+    node->token.opt_subexp = 1;
+
+  return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+    free_charset (node->opr.mbcset);
+  else
+#endif /* RE_ENABLE_I18N */
+    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+      re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking.  Free the allocated memory inside NODE
+   and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+  free_token (&node->token);
+  return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node.  This is a preorder
+   visit similar to the one implemented by the generic visitor, but
+   we need more infrastructure to maintain two parallel trees --- so,
+   it's easier to duplicate.  */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+  const bin_tree_t *node;
+  bin_tree_t *dup_root;
+  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+  for (node = root; ; )
+    {
+      /* Create a new tree and link it back to the current parent.  */
+      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+      if (*p_new == NULL)
+	return NULL;
+      (*p_new)->parent = dup_node;
+      (*p_new)->token.duplicated = 1;
+      dup_node = *p_new;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	{
+	  node = node->left;
+	  p_new = &dup_node->left;
+	}
+      else
+	{
+	  const bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      dup_node = dup_node->parent;
+	      if (!node)
+	        return dup_root;
+	    }
+	  node = node->right;
+	  p_new = &dup_node->right;
+	}
+    }
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regexec.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+				     int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+					  int str_idx, int from, int to)
+     internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+     internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+					   int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+						   int node, int str_idx)
+     internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+			   re_dfastate_t **limited_sts, int last_node,
+			   int last_str_idx)
+     internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+					 const char *string, int length,
+					 int start, int range, int stop,
+					 size_t nmatch, regmatch_t pmatch[],
+					 int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+			     const char *string1, int length1,
+			     const char *string2, int length2,
+			     int start, int range, struct re_registers *regs,
+			     int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+			   const char *string, int length, int start,
+			   int range, int stop, struct re_registers *regs,
+			   int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+			      int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+     internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+			   int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+				     const re_dfastate_t *state, int idx)
+     internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+			 regmatch_t *prev_idx_match, int cur_node,
+			 int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+				      int str_idx, int dest_node, int nregs,
+				      regmatch_t *regs,
+				      re_node_set *eps_via_nodes)
+     internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+			       const re_match_context_t *mctx,
+			       size_t nmatch, regmatch_t *pmatch,
+			       int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+     internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+				re_sift_context_t *sctx,
+				int node_idx, int str_idx, int max_str_idx)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+					   re_sift_context_t *sctx)
+     internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+					  re_sift_context_t *sctx, int str_idx,
+					  re_node_set *cur_dest)
+     internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+					      re_sift_context_t *sctx,
+					      int str_idx,
+					      re_node_set *dest_nodes)
+     internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+					    re_node_set *dest_nodes,
+					    const re_node_set *candidates)
+     internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+			     re_node_set *limits,
+			     int dst_node, int dst_idx, int src_node,
+			     int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+					int boundaries, int subexp_idx,
+					int from_node, int bkref_idx)
+     internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+				      int limit, int subexp_idx,
+				      int node, int str_idx,
+				      int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+					  re_node_set *dest_nodes,
+					  const re_node_set *candidates,
+					  re_node_set *limits,
+					  struct re_backref_cache_entry *bkref_ents,
+					  int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+					re_sift_context_t *sctx,
+					int str_idx, const re_node_set *candidates)
+     internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+					re_dfastate_t **dst,
+					re_dfastate_t **src, int num)
+     internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+					 re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+				     re_match_context_t *mctx,
+				     re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+					    re_match_context_t *mctx,
+					    re_dfastate_t *next_state)
+     internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+						re_node_set *cur_nodes,
+						int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+					re_match_context_t *mctx,
+					re_dfastate_t *pstate)
+     internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+				       re_dfastate_t *pstate)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+					  const re_node_set *nodes)
+     internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+				 int bkref_node, int bkref_str_idx)
+     internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+				     const re_sub_match_top_t *sub_top,
+				     re_sub_match_last_t *sub_last,
+				     int bkref_node, int bkref_str)
+     internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+			     int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+				    state_array_t *path, int top_node,
+				    int top_str, int last_node, int last_str,
+				    int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+						   int str_idx,
+						   re_node_set *cur_nodes,
+						   re_node_set *next_nodes)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+					       re_node_set *cur_nodes,
+					       int ex_subexp, int type)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+						   re_node_set *dst_nodes,
+						   int target, int ex_subexp,
+						   int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+					 re_node_set *cur_nodes, int cur_str,
+					 int subexp_num, int type)
+     internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+			  re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+				    const re_string_t *input, int idx)
+     internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+						   size_t name_len)
+     internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+				       const re_dfastate_t *state,
+				       re_node_set *states_node,
+				       bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+			      const re_token_t *node, int idx)
+     internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+     internal_function;
+
+/* Entry point for POSIX code.  */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *__restrict preg;
+    const char *__restrict string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  reg_errcode_t err;
+  int start, length;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+    return REG_BADPAT;
+
+  if (eflags & REG_STARTEND)
+    {
+      start = pmatch[0].rm_so;
+      length = pmatch[0].rm_eo;
+    }
+  else
+    {
+      start = 0;
+      length = strlen (string);
+    }
+
+  __libc_lock_lock (dfa->lock);
+  if (preg->no_sub)
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, 0, NULL, eflags);
+  else
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, nmatch, pmatch, eflags);
+  __libc_lock_unlock (dfa->lock);
+  return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+		  const char *__restrict string, size_t nmatch,
+		  regmatch_t pmatch[], int eflags)
+{
+  return regexec (preg, string, nmatch, pmatch,
+		  eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code.  */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+   The former two functions operate on STRING with length LENGTH,
+   while the later two operate on concatenation of STRING1 and STRING2
+   with lengths LENGTH1 and LENGTH2, respectively.
+
+   re_match() matches the compiled pattern in BUFP against the string,
+   starting at index START.
+
+   re_search() first tries matching at index START, then it tries to match
+   starting from index START + 1, and so on.  The last start position tried
+   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
+   way as re_match().)
+
+   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+   the first STOP characters of the concatenation of the strings should be
+   concerned.
+
+   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
+   computed relative to the concatenation, not relative to the individual
+   strings.)
+
+   On success, re_match* functions return the length of the match, re_search*
+   return the position of the start of the match.  Return value -1 means no
+   match was found and -2 indicates an internal error.  */
+
+int
+re_match (bufp, string, length, start, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+		  stop, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  const char *str;
+  int rval;
+  int len = length1 + length2;
+  int free_str = 0;
+
+  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+    return -2;
+
+  /* Concatenate the strings.  */
+  if (length2 > 0)
+    if (length1 > 0)
+      {
+	char *s = re_malloc (char, len);
+
+	if (BE (s == NULL, 0))
+	  return -2;
+#ifdef _LIBC
+	memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+	memcpy (s, string1, length1);
+	memcpy (s + length1, string2, length2);
+#endif
+	str = s;
+	free_str = 1;
+      }
+    else
+      str = string2;
+  else
+    str = string1;
+
+  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+			 ret_len);
+  if (free_str)
+    re_free ((char *) str);
+  return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+   Additional parameters:
+   If RET_LEN is nonzero the length of the match is returned (re_match style);
+   otherwise the position of the match is returned.  */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  reg_errcode_t result;
+  regmatch_t *pmatch;
+  int nregs, rval;
+  int eflags = 0;
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+
+  /* Check for out-of-range.  */
+  if (BE (start < 0 || start > length, 0))
+    return -1;
+  if (BE (start + range > length, 0))
+    range = length - start;
+  else if (BE (start + range < 0, 0))
+    range = -start;
+
+  __libc_lock_lock (dfa->lock);
+
+  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+  /* Compile fastmap if we haven't yet.  */
+  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+    re_compile_fastmap (bufp);
+
+  if (BE (bufp->no_sub, 0))
+    regs = NULL;
+
+  /* We need at least 1 register.  */
+  if (regs == NULL)
+    nregs = 1;
+  else if (BE (bufp->regs_allocated == REGS_FIXED &&
+	       regs->num_regs < bufp->re_nsub + 1, 0))
+    {
+      nregs = regs->num_regs;
+      if (BE (nregs < 1, 0))
+	{
+	  /* Nothing can be copied to regs.  */
+	  regs = NULL;
+	  nregs = 1;
+	}
+    }
+  else
+    nregs = bufp->re_nsub + 1;
+  pmatch = re_malloc (regmatch_t, nregs);
+  if (BE (pmatch == NULL, 0))
+    {
+      rval = -2;
+      goto out;
+    }
+
+  result = re_search_internal (bufp, string, length, start, range, stop,
+			       nregs, pmatch, eflags);
+
+  rval = 0;
+
+  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  if (result != REG_NOERROR)
+    rval = -1;
+  else if (regs != NULL)
+    {
+      /* If caller wants register contents data back, copy them.  */
+      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+					   bufp->regs_allocated);
+      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+	rval = -2;
+    }
+
+  if (BE (rval == 0, 1))
+    {
+      if (ret_len)
+	{
+	  assert (pmatch[0].rm_so == start);
+	  rval = pmatch[0].rm_eo - start;
+	}
+      else
+	rval = pmatch[0].rm_so;
+    }
+  re_free (pmatch);
+ out:
+  __libc_lock_unlock (dfa->lock);
+  return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+    struct re_registers *regs;
+    regmatch_t *pmatch;
+    int nregs, regs_allocated;
+{
+  int rval = REGS_REALLOCATE;
+  int i;
+  int need_regs = nregs + 1;
+  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+     uses.  */
+
+  /* Have the register data arrays been allocated?  */
+  if (regs_allocated == REGS_UNALLOCATED)
+    { /* No.  So allocate them with malloc.  */
+      regs->start = re_malloc (regoff_t, need_regs);
+      regs->end = re_malloc (regoff_t, need_regs);
+      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+	return REGS_UNALLOCATED;
+      regs->num_regs = need_regs;
+    }
+  else if (regs_allocated == REGS_REALLOCATE)
+    { /* Yes.  If we need more elements than were already
+	 allocated, reallocate them.  If we need fewer, just
+	 leave it alone.  */
+      if (BE (need_regs > regs->num_regs, 0))
+	{
+	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+	    return REGS_UNALLOCATED;
+	  regs->start = new_start;
+	  regs->end = new_end;
+	  regs->num_regs = need_regs;
+	}
+    }
+  else
+    {
+      assert (regs_allocated == REGS_FIXED);
+      /* This function may not be called with REGS_FIXED and nregs too big.  */
+      assert (regs->num_regs >= nregs);
+      rval = REGS_FIXED;
+    }
+
+  /* Copy the regs.  */
+  for (i = 0; i < nregs; ++i)
+    {
+      regs->start[i] = pmatch[i].rm_so;
+      regs->end[i] = pmatch[i].rm_eo;
+    }
+  for ( ; i < regs->num_regs; ++i)
+    regs->start[i] = regs->end[i] = -1;
+
+  return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t *) 0;
+    }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+     const char *s;
+{
+  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.  */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
+   mingings with regexec.  START, and RANGE have the same meanings
+   with re_search.
+   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+   otherwise return the error code.
+   Note: We assume front end functions already check ranges.
+   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+		    eflags)
+    const regex_t *preg;
+    const char *string;
+    int length, start, range, stop, eflags;
+    size_t nmatch;
+    regmatch_t pmatch[];
+{
+  reg_errcode_t err;
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int left_lim, right_lim, incr;
+  int fl_longest_match, match_first, match_kind, match_last = -1;
+  int extra_nmatch;
+  int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  re_match_context_t mctx = { .dfa = dfa };
+#else
+  re_match_context_t mctx;
+#endif
+  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
+  RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+  memset (&mctx, '\0', sizeof (re_match_context_t));
+  mctx.dfa = dfa;
+#endif
+
+  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+  nmatch -= extra_nmatch;
+
+  /* Check if the DFA haven't been compiled.  */
+  if (BE (preg->used == 0 || dfa->init_state == NULL
+	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	  || dfa->init_state_begbuf == NULL, 0))
+    return REG_NOMATCH;
+
+#ifdef DEBUG
+  /* We assume front-end functions already check them.  */
+  assert (start + range >= 0 && start + range <= length);
+#endif
+
+  /* If initial states with non-begbuf contexts have no elements,
+     the regex must be anchored.  If preg->newline_anchor is set,
+     we'll never use init_state_nl, so do not check it.  */
+  if (dfa->init_state->nodes.nelem == 0
+      && dfa->init_state_word->nodes.nelem == 0
+      && (dfa->init_state_nl->nodes.nelem == 0
+	  || !preg->newline_anchor))
+    {
+      if (start != 0 && start + range != 0)
+        return REG_NOMATCH;
+      start = range = 0;
+    }
+
+  /* We must check the longest matching, if nmatch > 0.  */
+  fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+			    preg->translate, preg->syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+  mctx.input.stop = stop;
+  mctx.input.raw_stop = stop;
+  mctx.input.newline_anchor = preg->newline_anchor;
+
+  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* We will log all the DFA states through which the dfa pass,
+     if nmatch > 1, or this dfa has "multibyte node", which is a
+     back-reference or a node which can accept multibyte character or
+     multi character collating element.  */
+  if (nmatch > 1 || dfa->has_mb_node)
+    {
+      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+      if (BE (mctx.state_log == NULL, 0))
+	{
+	  err = REG_ESPACE;
+	  goto free_return;
+	}
+    }
+  else
+    mctx.state_log = NULL;
+
+  match_first = start;
+  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+  /* Check incrementally whether of not the input string match.  */
+  incr = (range < 0) ? -1 : 1;
+  left_lim = (range < 0) ? start + range : start;
+  right_lim = (range < 0) ? start : start + range;
+  sb = dfa->mb_cur_max == 1;
+  match_kind =
+    (fastmap
+     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+	| (range >= 0 ? 2 : 0)
+	| (t != NULL ? 1 : 0))
+     : 8);
+
+  for (;; match_first += incr)
+    {
+      err = REG_NOMATCH;
+      if (match_first < left_lim || right_lim < match_first)
+	goto free_return;
+
+      /* Advance as rapidly as possible through the string, until we
+	 find a plausible place to start matching.  This may be done
+	 with varying efficiency, so there are various possibilities:
+	 only the most common of them are specialized, in order to
+	 save on code size.  We use a switch statement for speed.  */
+      switch (match_kind)
+	{
+	case 8:
+	  /* No fastmap.  */
+	  break;
+
+	case 7:
+	  /* Fastmap with single-byte translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[t[(unsigned char) string[match_first]]])
+	    ++match_first;
+	  goto forward_match_found_start_or_reached_end;
+
+	case 6:
+	  /* Fastmap without translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[(unsigned char) string[match_first]])
+	    ++match_first;
+
+	forward_match_found_start_or_reached_end:
+	  if (BE (match_first == right_lim, 0))
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (!fastmap[t ? t[ch] : ch])
+		goto free_return;
+	    }
+	  break;
+
+	case 4:
+	case 5:
+	  /* Fastmap without multi-byte translation, match backwards.  */
+	  while (match_first >= left_lim)
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (fastmap[t ? t[ch] : ch])
+		break;
+	      --match_first;
+	    }
+	  if (match_first < left_lim)
+	    goto free_return;
+	  break;
+
+	default:
+	  /* In this case, we can't determine easily the current byte,
+	     since it might be a component byte of a multibyte
+	     character.  Then we use the constructed buffer instead.  */
+	  for (;;)
+	    {
+	      /* If MATCH_FIRST is out of the valid range, reconstruct the
+		 buffers.  */
+	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+		{
+		  err = re_string_reconstruct (&mctx.input, match_first,
+					       eflags);
+		  if (BE (err != REG_NOERROR, 0))
+		    goto free_return;
+
+		  offset = match_first - mctx.input.raw_mbs_idx;
+		}
+	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+		 Note that MATCH_FIRST must not be smaller than 0.  */
+	      ch = (match_first >= length
+		    ? 0 : re_string_byte_at (&mctx.input, offset));
+	      if (fastmap[ch])
+		break;
+	      match_first += incr;
+	      if (match_first < left_lim || match_first > right_lim)
+	        {
+	          err = REG_NOMATCH;
+	          goto free_return;
+	        }
+	    }
+	  break;
+	}
+
+      /* Reconstruct the buffers so that the matcher can assume that
+	 the matching starts from the beginning of the buffer.  */
+      err = re_string_reconstruct (&mctx.input, match_first, eflags);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+#ifdef RE_ENABLE_I18N
+     /* Don't consider this char as a possible match start if it part,
+	yet isn't the head, of a multibyte character.  */
+      if (!sb && !re_string_first_byte (&mctx.input, 0))
+	continue;
+#endif
+
+      /* It seems to be appropriate one, then use the matcher.  */
+      /* We assume that the matching starts from 0.  */
+      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+      match_last = check_matching (&mctx, fl_longest_match,
+				   range >= 0 ? &match_first : NULL);
+      if (match_last != -1)
+	{
+	  if (BE (match_last == -2, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  else
+	    {
+	      mctx.match_last = match_last;
+	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+		{
+		  re_dfastate_t *pstate = mctx.state_log[match_last];
+		  mctx.last_node = check_halt_state_context (&mctx, pstate,
+							     match_last);
+		}
+	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+		  || dfa->nbackref)
+		{
+		  err = prune_impossible_nodes (&mctx);
+		  if (err == REG_NOERROR)
+		    break;
+		  if (BE (err != REG_NOMATCH, 0))
+		    goto free_return;
+		  match_last = -1;
+		}
+	      else
+		break; /* We found a match.  */
+	    }
+	}
+
+      match_ctx_clean (&mctx);
+    }
+
+#ifdef DEBUG
+  assert (match_last != -1);
+  assert (err == REG_NOERROR);
+#endif
+
+  /* Set pmatch[] if we need.  */
+  if (nmatch > 0)
+    {
+      int reg_idx;
+
+      /* Initialize registers.  */
+      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+      /* Set the points where matching start/end.  */
+      pmatch[0].rm_so = 0;
+      pmatch[0].rm_eo = mctx.match_last;
+
+      if (!preg->no_sub && nmatch > 1)
+	{
+	  err = set_regs (preg, &mctx, nmatch, pmatch,
+			  dfa->has_plural_match && dfa->nbackref > 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* At last, add the offset to the each registers, since we slided
+	 the buffers so that we could assume that the matching starts
+	 from 0.  */
+      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+	if (pmatch[reg_idx].rm_so != -1)
+	  {
+#ifdef RE_ENABLE_I18N
+	    if (BE (mctx.input.offsets_needed != 0, 0))
+	      {
+		pmatch[reg_idx].rm_so =
+		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+		pmatch[reg_idx].rm_eo =
+		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+	      }
+#else
+	    assert (mctx.input.offsets_needed == 0);
+#endif
+	    pmatch[reg_idx].rm_so += match_first;
+	    pmatch[reg_idx].rm_eo += match_first;
+	  }
+      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+	{
+	  pmatch[nmatch + reg_idx].rm_so = -1;
+	  pmatch[nmatch + reg_idx].rm_eo = -1;
+	}
+
+      if (dfa->subexp_map)
+        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+          if (dfa->subexp_map[reg_idx] != reg_idx)
+            {
+              pmatch[reg_idx + 1].rm_so
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+              pmatch[reg_idx + 1].rm_eo
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+            }
+    }
+
+ free_return:
+  re_free (mctx.state_log);
+  if (dfa->nbackref)
+    match_ctx_free (&mctx);
+  re_string_destruct (&mctx.input);
+  return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+     re_match_context_t *mctx;
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int halt_node, match_last;
+  reg_errcode_t ret;
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **lim_states = NULL;
+  re_sift_context_t sctx;
+#ifdef DEBUG
+  assert (mctx->state_log != NULL);
+#endif
+  match_last = mctx->match_last;
+  halt_node = mctx->last_node;
+  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+  if (BE (sifted_states == NULL, 0))
+    {
+      ret = REG_ESPACE;
+      goto free_return;
+    }
+  if (dfa->nbackref)
+    {
+      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+      if (BE (lim_states == NULL, 0))
+	{
+	  ret = REG_ESPACE;
+	  goto free_return;
+	}
+      while (1)
+	{
+	  memset (lim_states, '\0',
+		  sizeof (re_dfastate_t *) * (match_last + 1));
+	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+			 match_last);
+	  ret = sift_states_backward (mctx, &sctx);
+	  re_node_set_free (&sctx.limits);
+	  if (BE (ret != REG_NOERROR, 0))
+	      goto free_return;
+	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
+	    break;
+	  do
+	    {
+	      --match_last;
+	      if (match_last < 0)
+		{
+		  ret = REG_NOMATCH;
+		  goto free_return;
+		}
+	    } while (mctx->state_log[match_last] == NULL
+		     || !mctx->state_log[match_last]->halt);
+	  halt_node = check_halt_state_context (mctx,
+						mctx->state_log[match_last],
+						match_last);
+	}
+      ret = merge_state_array (dfa, sifted_states, lim_states,
+			       match_last + 1);
+      re_free (lim_states);
+      lim_states = NULL;
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  else
+    {
+      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+      ret = sift_states_backward (mctx, &sctx);
+      re_node_set_free (&sctx.limits);
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  re_free (mctx->state_log);
+  mctx->state_log = sifted_states;
+  sifted_states = NULL;
+  mctx->last_node = halt_node;
+  mctx->match_last = match_last;
+  ret = REG_NOERROR;
+ free_return:
+  re_free (sifted_states);
+  re_free (lim_states);
+  return ret;
+}
+
+/* Acquire an initial state and return it.
+   We must select appropriate initial state depending on the context,
+   since initial states may have constraints like "\<", "^", etc..  */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+			    int idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  if (dfa->init_state->has_constraint)
+    {
+      unsigned int context;
+      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+      if (IS_WORD_CONTEXT (context))
+	return dfa->init_state_word;
+      else if (IS_ORDINARY_CONTEXT (context))
+	return dfa->init_state;
+      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_begbuf;
+      else if (IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_nl;
+      else if (IS_BEGBUF_CONTEXT (context))
+	{
+	  /* It is relatively rare case, then calculate on demand.  */
+	  return re_acquire_state_context (err, dfa,
+					   dfa->init_state->entrance_nodes,
+					   context);
+	}
+      else
+	/* Must not happen?  */
+	return dfa->init_state;
+    }
+  else
+    return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+   and return the index where the matching end, return -1 if not match,
+   or return -2 in case of an error.
+   FL_LONGEST_MATCH means we want the POSIX longest matching.
+   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+   next place where we may want to try matching.
+   Note that the matcher assume that the maching starts from the current
+   index of the buffer.  */
+
+static int
+internal_function
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+		int *p_match_first)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int match = 0;
+  int match_last = -1;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+  re_dfastate_t *cur_state;
+  int at_init_state = p_match_first != NULL;
+  int next_start_idx = cur_str_idx;
+
+  err = REG_NOERROR;
+  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+  /* An initial state must not be NULL (invalid).  */
+  if (BE (cur_state == NULL, 0))
+    {
+      assert (err == REG_ESPACE);
+      return -2;
+    }
+
+  if (mctx->state_log != NULL)
+    {
+      mctx->state_log[cur_str_idx] = cur_state;
+
+      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+	 later.  E.g. Processing back references.  */
+      if (BE (dfa->nbackref, 0))
+	{
+	  at_init_state = 0;
+	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  if (cur_state->has_backref)
+	    {
+	      err = transit_state_bkref (mctx, &cur_state->nodes);
+	      if (BE (err != REG_NOERROR, 0))
+	        return err;
+	    }
+	}
+    }
+
+  /* If the RE accepts NULL string.  */
+  if (BE (cur_state->halt, 0))
+    {
+      if (!cur_state->has_constraint
+	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
+	{
+	  if (!fl_longest_match)
+	    return cur_str_idx;
+	  else
+	    {
+	      match_last = cur_str_idx;
+	      match = 1;
+	    }
+	}
+    }
+
+  while (!re_string_eoi (&mctx->input))
+    {
+      re_dfastate_t *old_state = cur_state;
+      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+          || (BE (next_char_idx >= mctx->input.valid_len, 0)
+              && mctx->input.valid_len < mctx->input.len))
+        {
+          err = extend_buffers (mctx);
+          if (BE (err != REG_NOERROR, 0))
+	    {
+	      assert (err == REG_ESPACE);
+	      return -2;
+	    }
+        }
+
+      cur_state = transit_state (&err, mctx, cur_state);
+      if (mctx->state_log != NULL)
+	cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+      if (cur_state == NULL)
+	{
+	  /* Reached the invalid state or an error.  Try to recover a valid
+	     state using the state log, if available and if we have not
+	     already found a valid (even if not the longest) match.  */
+	  if (BE (err != REG_NOERROR, 0))
+	    return -2;
+
+	  if (mctx->state_log == NULL
+	      || (match && !fl_longest_match)
+	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
+	    break;
+	}
+
+      if (BE (at_init_state, 0))
+	{
+	  if (old_state == cur_state)
+	    next_start_idx = next_char_idx;
+	  else
+	    at_init_state = 0;
+	}
+
+      if (cur_state->halt)
+	{
+	  /* Reached a halt state.
+	     Check the halt state can satisfy the current context.  */
+	  if (!cur_state->has_constraint
+	      || check_halt_state_context (mctx, cur_state,
+					   re_string_cur_idx (&mctx->input)))
+	    {
+	      /* We found an appropriate halt state.  */
+	      match_last = re_string_cur_idx (&mctx->input);
+	      match = 1;
+
+	      /* We found a match, do not modify match_first below.  */
+	      p_match_first = NULL;
+	      if (!fl_longest_match)
+		break;
+	    }
+	}
+    }
+
+  if (p_match_first)
+    *p_match_first += next_start_idx;
+
+  return match_last;
+}
+
+/* Check NODE match the current context.  */
+
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
+{
+  re_token_type_t type = dfa->nodes[node].type;
+  unsigned int constraint = dfa->nodes[node].constraint;
+  if (type != END_OF_RE)
+    return 0;
+  if (!constraint)
+    return 1;
+  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+    return 0;
+  return 1;
+}
+
+/* Check the halt state STATE match the current context.
+   Return 0 if not match, if the node, STATE has, is a halt node and
+   match the context, return the node.  */
+
+static int
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+			  const re_dfastate_t *state, int idx)
+{
+  int i;
+  unsigned int context;
+#ifdef DEBUG
+  assert (state->halt);
+#endif
+  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+  for (i = 0; i < state->nodes.nelem; ++i)
+    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+      return state->nodes.elems[i];
+  return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+   corresponding to the DFA).
+   Return the destination node, and update EPS_VIA_NODES, return -1 in case
+   of errors.  */
+
+static int
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+		   int *pidx, int node, re_node_set *eps_via_nodes,
+		   struct re_fail_stack_t *fs)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int i, err;
+  if (IS_EPSILON_NODE (dfa->nodes[node].type))
+    {
+      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+      re_node_set *edests = &dfa->edests[node];
+      int dest_node;
+      err = re_node_set_insert (eps_via_nodes, node);
+      if (BE (err < 0, 0))
+	return -2;
+      /* Pick up a valid destination, or return -1 if none is found.  */
+      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+	{
+	  int candidate = edests->elems[i];
+	  if (!re_node_set_contains (cur_nodes, candidate))
+	    continue;
+          if (dest_node == -1)
+	    dest_node = candidate;
+
+          else
+	    {
+	      /* In order to avoid infinite loop like "(a*)*", return the second
+	         epsilon-transition if the first was already considered.  */
+	      if (re_node_set_contains (eps_via_nodes, dest_node))
+	        return candidate;
+
+	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
+	      else if (fs != NULL
+		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+				           eps_via_nodes))
+		return -2;
+
+	      /* We know we are going to exit.  */
+	      break;
+	    }
+	}
+      return dest_node;
+    }
+  else
+    {
+      int naccepted = 0;
+      re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+      if (dfa->nodes[node].accept_mb)
+	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+      else
+#endif /* RE_ENABLE_I18N */
+      if (type == OP_BACK_REF)
+	{
+	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
+	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+	  if (fs != NULL)
+	    {
+	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+		return -1;
+	      else if (naccepted)
+		{
+		  char *buf = (char *) re_string_get_buffer (&mctx->input);
+		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+			      naccepted) != 0)
+		    return -1;
+		}
+	    }
+
+	  if (naccepted == 0)
+	    {
+	      int dest_node;
+	      err = re_node_set_insert (eps_via_nodes, node);
+	      if (BE (err < 0, 0))
+		return -2;
+	      dest_node = dfa->edests[node].elems[0];
+	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					dest_node))
+		return dest_node;
+	    }
+	}
+
+      if (naccepted != 0
+	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
+	{
+	  int dest_node = dfa->nexts[node];
+	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					       dest_node)))
+	    return -1;
+	  re_node_set_empty (eps_via_nodes);
+	  return dest_node;
+	}
+    }
+  return -1;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  reg_errcode_t err;
+  int num = fs->num++;
+  if (fs->num == fs->alloc)
+    {
+      struct re_fail_stack_ent_t *new_array;
+      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+				       * fs->alloc * 2));
+      if (new_array == NULL)
+	return REG_ESPACE;
+      fs->alloc *= 2;
+      fs->stack = new_array;
+    }
+  fs->stack[num].idx = str_idx;
+  fs->stack[num].node = dest_node;
+  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+  if (fs->stack[num].regs == NULL)
+    return REG_ESPACE;
+  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+  return err;
+}
+
+static int
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+		regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  int num = --fs->num;
+  assert (num >= 0);
+  *pidx = fs->stack[num].idx;
+  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+  re_node_set_free (eps_via_nodes);
+  re_free (fs->stack[num].regs);
+  *eps_via_nodes = fs->stack[num].eps_via_nodes;
+  return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+   PMATCH.
+   Note: We assume that pmatch[0] is already set, and
+   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+	  regmatch_t *pmatch, int fl_backtrack)
+{
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int idx, cur_node;
+  re_node_set eps_via_nodes;
+  struct re_fail_stack_t *fs;
+  struct re_fail_stack_t fs_body = { 0, 2, NULL };
+  regmatch_t *prev_idx_match;
+  int prev_idx_match_malloced = 0;
+
+#ifdef DEBUG
+  assert (nmatch > 1);
+  assert (mctx->state_log != NULL);
+#endif
+  if (fl_backtrack)
+    {
+      fs = &fs_body;
+      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+      if (fs->stack == NULL)
+	return REG_ESPACE;
+    }
+  else
+    fs = NULL;
+
+  cur_node = dfa->init_node;
+  re_node_set_init_empty (&eps_via_nodes);
+
+  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+  else
+    {
+      prev_idx_match = re_malloc (regmatch_t, nmatch);
+      if (prev_idx_match == NULL)
+	{
+	  free_fail_stack_return (fs);
+	  return REG_ESPACE;
+	}
+      prev_idx_match_malloced = 1;
+    }
+  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+    {
+      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+	{
+	  int reg_idx;
+	  if (fs)
+	    {
+	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+		  break;
+	      if (reg_idx == nmatch)
+		{
+		  re_node_set_free (&eps_via_nodes);
+		  if (prev_idx_match_malloced)
+		    re_free (prev_idx_match);
+		  return free_fail_stack_return (fs);
+		}
+	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+					 &eps_via_nodes);
+	    }
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOERROR;
+	    }
+	}
+
+      /* Proceed to next node.  */
+      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+				    &eps_via_nodes, fs);
+
+      if (BE (cur_node < 0, 0))
+	{
+	  if (BE (cur_node == -2, 0))
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      free_fail_stack_return (fs);
+	      return REG_ESPACE;
+	    }
+	  if (fs)
+	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+				       &eps_via_nodes);
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOMATCH;
+	    }
+	}
+    }
+  re_node_set_free (&eps_via_nodes);
+  if (prev_idx_match_malloced)
+    re_free (prev_idx_match);
+  return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+  if (fs)
+    {
+      int fs_idx;
+      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+	{
+	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+	  re_free (fs->stack[fs_idx].regs);
+	}
+      re_free (fs->stack);
+    }
+  return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
+{
+  int type = dfa->nodes[cur_node].type;
+  if (type == OP_OPEN_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+      /* We are at the first node of this sub expression.  */
+      if (reg_num < nmatch)
+	{
+	  pmatch[reg_num].rm_so = cur_idx;
+	  pmatch[reg_num].rm_eo = -1;
+	}
+    }
+  else if (type == OP_CLOSE_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num < nmatch)
+	{
+	  /* We are at the last node of this sub expression.  */
+	  if (pmatch[reg_num].rm_so < cur_idx)
+	    {
+	      pmatch[reg_num].rm_eo = cur_idx;
+	      /* This is a non-empty match or we are not inside an optional
+		 subexpression.  Accept this right away.  */
+	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+	    }
+	  else
+	    {
+	      if (dfa->nodes[cur_node].opt_subexp
+		  && prev_idx_match[reg_num].rm_so != -1)
+		/* We transited through an empty match for an optional
+		   subexpression, like (a?)*, and this is not the subexp's
+		   first match.  Copy back the old content of the registers
+		   so that matches of an inner subexpression are undone as
+		   well, like in ((a?))*.  */
+		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+	      else
+		/* We completed a subexpression, but it may be part of
+		   an optional one, so do not update PREV_IDX_MATCH.  */
+		pmatch[reg_num].rm_eo = cur_idx;
+	    }
+	}
+    }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+   and sift the nodes in each states according to the following rules.
+   Updated state_log will be wrote to STATE_LOG.
+
+   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+	the LAST_NODE, we throw away the node `a'.
+     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+	string `s' and transit to `b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+	   away the node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+	    thrown away, we throw away the node `a'.
+     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+	   node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+	    we throw away the node `a'.  */
+
+#define STATE_NODE_CONTAINS(state,node) \
+  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+  reg_errcode_t err;
+  int null_cnt = 0;
+  int str_idx = sctx->last_str_idx;
+  re_node_set cur_dest;
+
+#ifdef DEBUG
+  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
+     transit to the last_node and the last_node itself.  */
+  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* Then check each states in the state_log.  */
+  while (str_idx > 0)
+    {
+      /* Update counters.  */
+      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+      if (null_cnt > mctx->max_mb_elem_len)
+	{
+	  memset (sctx->sifted_states, '\0',
+		  sizeof (re_dfastate_t *) * str_idx);
+	  re_node_set_free (&cur_dest);
+	  return REG_NOERROR;
+	}
+      re_node_set_empty (&cur_dest);
+      --str_idx;
+
+      if (mctx->state_log[str_idx])
+	{
+	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+          if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* Add all the nodes which satisfy the following conditions:
+	 - It can epsilon transit to a node in CUR_DEST.
+	 - It is in CUR_SRC.
+	 And update state_log.  */
+      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+    }
+  err = REG_NOERROR;
+ free_return:
+  re_node_set_free (&cur_dest);
+  return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int str_idx, re_node_set *cur_dest)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+  int i;
+
+  /* Then build the next sifted state.
+     We build the next sifted state on `cur_dest', and update
+     `sifted_states[str_idx]' with `cur_dest'.
+     Note:
+     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+     `cur_src' points the node_set of the old `state_log[str_idx]'
+     (with the epsilon nodes pre-filtered out).  */
+  for (i = 0; i < cur_src->nelem; i++)
+    {
+      int prev_node = cur_src->elems[i];
+      int naccepted = 0;
+      int ret;
+
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[prev_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[prev_node].accept_mb)
+	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+					 str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+      /* We don't check backreferences here.
+	 See update_cur_sifted_state().  */
+      if (!naccepted
+	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+				  dfa->nexts[prev_node]))
+	naccepted = 1;
+
+      if (naccepted == 0)
+	continue;
+
+      if (sctx->limits.nelem)
+	{
+	  int to_idx = str_idx + naccepted;
+	  if (check_dst_limits (mctx, &sctx->limits,
+				dfa->nexts[prev_node], to_idx,
+				prev_node, str_idx))
+	    continue;
+	}
+      ret = re_node_set_insert (cur_dest, prev_node);
+      if (BE (ret == -1, 0))
+	return REG_ESPACE;
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions.  */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
+{
+  int top = mctx->state_log_top;
+
+  if (next_state_log_idx >= mctx->input.bufs_len
+      || (next_state_log_idx >= mctx->input.valid_len
+	  && mctx->input.valid_len < mctx->input.len))
+    {
+      reg_errcode_t err;
+      err = extend_buffers (mctx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (top < next_state_log_idx)
+    {
+      memset (mctx->state_log + top + 1, '\0',
+	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+      mctx->state_log_top = next_state_log_idx;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+		   re_dfastate_t **src, int num)
+{
+  int st_idx;
+  reg_errcode_t err;
+  for (st_idx = 0; st_idx < num; ++st_idx)
+    {
+      if (dst[st_idx] == NULL)
+	dst[st_idx] = src[st_idx];
+      else if (src[st_idx] != NULL)
+	{
+	  re_node_set merged_set;
+	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+					&src[st_idx]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+	  re_node_set_free (&merged_set);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+			 re_sift_context_t *sctx, int str_idx,
+			 re_node_set *dest_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  const re_node_set *candidates;
+  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+		: &mctx->state_log[str_idx]->nodes);
+
+  if (dest_nodes->nelem == 0)
+    sctx->sifted_states[str_idx] = NULL;
+  else
+    {
+      if (candidates)
+	{
+	  /* At first, add the nodes which can epsilon transit to a node in
+	     DEST_NODE.  */
+	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  /* Then, check the limitations in the current sift_context.  */
+	  if (sctx->limits.nelem)
+	    {
+	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+					 mctx->bkref_ents, str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	}
+
+      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (candidates && mctx->state_log[str_idx]->has_backref)
+    {
+      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+  reg_errcode_t err = REG_NOERROR;
+  int i;
+
+  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  if (!state->inveclosure.alloc)
+    {
+      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+      if (BE (err != REG_NOERROR, 0))
+        return REG_ESPACE;
+      for (i = 0; i < dest_nodes->nelem; i++)
+        re_node_set_merge (&state->inveclosure,
+			   dfa->inveclosures + dest_nodes->elems[i]);
+    }
+  return re_node_set_add_intersect (dest_nodes, candidates,
+				    &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+    int ecl_idx;
+    reg_errcode_t err;
+    re_node_set *inv_eclosure = dfa->inveclosures + node;
+    re_node_set except_nodes;
+    re_node_set_init_empty (&except_nodes);
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (cur_node == node)
+	  continue;
+	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+	  {
+	    int edst1 = dfa->edests[cur_node].elems[0];
+	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
+			 ? dfa->edests[cur_node].elems[1] : -1);
+	    if ((!re_node_set_contains (inv_eclosure, edst1)
+		 && re_node_set_contains (dest_nodes, edst1))
+		|| (edst2 > 0
+		    && !re_node_set_contains (inv_eclosure, edst2)
+		    && re_node_set_contains (dest_nodes, edst2)))
+	      {
+		err = re_node_set_add_intersect (&except_nodes, candidates,
+						 dfa->inveclosures + cur_node);
+		if (BE (err != REG_NOERROR, 0))
+		  {
+		    re_node_set_free (&except_nodes);
+		    return err;
+		  }
+	      }
+	  }
+      }
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (!re_node_set_contains (&except_nodes, cur_node))
+	  {
+	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+	    re_node_set_remove_at (dest_nodes, idx);
+	  }
+      }
+    re_node_set_free (&except_nodes);
+    return REG_NOERROR;
+}
+
+static int
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+		  int dst_node, int dst_idx, int src_node, int src_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int lim_idx, src_pos, dst_pos;
+
+  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = mctx->bkref_ents + limits->elems[lim_idx];
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, dst_node, dst_idx,
+					   dst_bkref_idx);
+      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, src_node, src_idx,
+					   src_bkref_idx);
+
+      /* In case of:
+	 <src> <dst> ( <subexp> )
+	 ( <subexp> ) <src> <dst>
+	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
+      if (src_pos == dst_pos)
+	continue; /* This is unrelated limitation.  */
+      else
+	return 1;
+    }
+  return 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+			     int subexp_idx, int from_node, int bkref_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *eclosures = dfa->eclosures + from_node;
+  int node_idx;
+
+  /* Else, we are on the boundary: examine the nodes on the epsilon
+     closure.  */
+  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+    {
+      int node = eclosures->elems[node_idx];
+      switch (dfa->nodes[node].type)
+	{
+	case OP_BACK_REF:
+	  if (bkref_idx != -1)
+	    {
+	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+	      do
+	        {
+		  int dst, cpos;
+
+		  if (ent->node != node)
+		    continue;
+
+		  if (subexp_idx < BITSET_WORD_BITS
+		      && !(ent->eps_reachable_subexps_map
+			   & ((bitset_word_t) 1 << subexp_idx)))
+		    continue;
+
+		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
+		     OP_CLOSE_SUBEXP cases below.  But, if the
+		     destination node is the same node as the source
+		     node, don't recurse because it would cause an
+		     infinite loop: a regex that exhibits this behavior
+		     is ()\1*\1*  */
+		  dst = dfa->edests[node].elems[0];
+		  if (dst == from_node)
+		    {
+		      if (boundaries & 1)
+		        return -1;
+		      else /* if (boundaries & 2) */
+		        return 0;
+		    }
+
+		  cpos =
+		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+						 dst, bkref_idx);
+		  if (cpos == -1 /* && (boundaries & 1) */)
+		    return -1;
+		  if (cpos == 0 && (boundaries & 2))
+		    return 0;
+
+		  if (subexp_idx < BITSET_WORD_BITS)
+		    ent->eps_reachable_subexps_map
+		      &= ~((bitset_word_t) 1 << subexp_idx);
+	        }
+	      while (ent++->more);
+	    }
+	  break;
+
+	case OP_OPEN_SUBEXP:
+	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return -1;
+	  break;
+
+	case OP_CLOSE_SUBEXP:
+	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return 0;
+	  break;
+
+	default:
+	    break;
+	}
+    }
+
+  return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+			   int subexp_idx, int from_node, int str_idx,
+			   int bkref_idx)
+{
+  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+  int boundaries;
+
+  /* If we are outside the range of the subexpression, return -1 or 1.  */
+  if (str_idx < lim->subexp_from)
+    return -1;
+
+  if (lim->subexp_to < str_idx)
+    return 1;
+
+  /* If we are within the subexpression, return 0.  */
+  boundaries = (str_idx == lim->subexp_from);
+  boundaries |= (str_idx == lim->subexp_to) << 1;
+  if (boundaries == 0)
+    return 0;
+
+  /* Else, examine epsilon closure.  */
+  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+				      from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+   which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		     const re_node_set *candidates, re_node_set *limits,
+		     struct re_backref_cache_entry *bkref_ents, int str_idx)
+{
+  reg_errcode_t err;
+  int node_idx, lim_idx;
+
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = bkref_ents + limits->elems[lim_idx];
+
+      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+	continue; /* This is unrelated limitation.  */
+
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+      if (ent->subexp_to == str_idx)
+	{
+	  int ops_node = -1;
+	  int cls_node = -1;
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_OPEN_SUBEXP
+		  && subexp_idx == dfa->nodes[node].opr.idx)
+		ops_node = node;
+	      else if (type == OP_CLOSE_SUBEXP
+		       && subexp_idx == dfa->nodes[node].opr.idx)
+		cls_node = node;
+	    }
+
+	  /* Check the limitation of the open subexpression.  */
+	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
+	  if (ops_node >= 0)
+	    {
+	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+					   candidates);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+
+	  /* Check the limitation of the close subexpression.  */
+	  if (cls_node >= 0)
+	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	      {
+		int node = dest_nodes->elems[node_idx];
+		if (!re_node_set_contains (dfa->inveclosures + node,
+					   cls_node)
+		    && !re_node_set_contains (dfa->eclosures + node,
+					      cls_node))
+		  {
+		    /* It is against this limitation.
+		       Remove it form the current sifted state.  */
+		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+						 candidates);
+		    if (BE (err != REG_NOERROR, 0))
+		      return err;
+		    --node_idx;
+		  }
+	      }
+	}
+      else /* (ent->subexp_to != str_idx)  */
+	{
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+		{
+		  if (subexp_idx != dfa->nodes[node].opr.idx)
+		    continue;
+		  /* It is against this limitation.
+		     Remove it form the current sifted state.  */
+		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+					       candidates);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		}
+	    }
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		   int str_idx, const re_node_set *candidates)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int node_idx, node;
+  re_sift_context_t local_sctx;
+  int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+  if (first_idx == -1)
+    return REG_NOERROR;
+
+  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
+
+  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+    {
+      int enabled_idx;
+      re_token_type_t type;
+      struct re_backref_cache_entry *entry;
+      node = candidates->elems[node_idx];
+      type = dfa->nodes[node].type;
+      /* Avoid infinite loop for the REs like "()\1+".  */
+      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+	continue;
+      if (type != OP_BACK_REF)
+	continue;
+
+      entry = mctx->bkref_ents + first_idx;
+      enabled_idx = first_idx;
+      do
+	{
+	  int subexp_len;
+	  int to_idx;
+	  int dst_node;
+	  int ret;
+	  re_dfastate_t *cur_state;
+
+	  if (entry->node != node)
+	    continue;
+	  subexp_len = entry->subexp_to - entry->subexp_from;
+	  to_idx = str_idx + subexp_len;
+	  dst_node = (subexp_len ? dfa->nexts[node]
+		      : dfa->edests[node].elems[0]);
+
+	  if (to_idx > sctx->last_str_idx
+	      || sctx->sifted_states[to_idx] == NULL
+	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+	      || check_dst_limits (mctx, &sctx->limits, node,
+				   str_idx, dst_node, to_idx))
+	    continue;
+
+	  if (local_sctx.sifted_states == NULL)
+	    {
+	      local_sctx = *sctx;
+	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.last_node = node;
+	  local_sctx.last_str_idx = str_idx;
+	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+	  if (BE (ret < 0, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  cur_state = local_sctx.sifted_states[str_idx];
+	  err = sift_states_backward (mctx, &local_sctx);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	  if (sctx->limited_states != NULL)
+	    {
+	      err = merge_state_array (dfa, sctx->limited_states,
+				       local_sctx.sifted_states,
+				       str_idx + 1);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.sifted_states[str_idx] = cur_state;
+	  re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+	  /* mctx->bkref_ents may have changed, reload the pointer.  */
+          entry = mctx->bkref_ents + enabled_idx;
+	}
+      while (enabled_idx++, entry++->more);
+    }
+  err = REG_NOERROR;
+ free_return:
+  if (local_sctx.sifted_states != NULL)
+    {
+      re_node_set_free (&local_sctx.limits);
+    }
+
+  return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int node_idx, int str_idx, int max_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int naccepted;
+  /* Check the node can accept `multi byte'.  */
+  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+			    dfa->nexts[node_idx]))
+    /* The node can't accept the `multi byte', or the
+       destination was already thrown away, then the node
+       could't accept the current input `multi byte'.   */
+    naccepted = 0;
+  /* Otherwise, it is sure that the node could accept
+     `naccepted' bytes input.  */
+  return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition.  */
+
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte, and update STATE_LOG if necessary.
+   If STATE can accept a multibyte char/collating element/back reference
+   update the destination of STATE_LOG.  */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+	       re_dfastate_t *state)
+{
+  re_dfastate_t **trtable;
+  unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+  /* If the current state can accept multibyte.  */
+  if (BE (state->accept_mb, 0))
+    {
+      *err = transit_state_mb (mctx, state);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  /* Then decide the next state with the single byte.  */
+#if 0
+  if (0)
+    /* don't use transition table  */
+    return transit_state_sb (err, mctx, state);
+#endif
+
+  /* Use transition table  */
+  ch = re_string_fetch_byte (&mctx->input);
+  for (;;)
+    {
+      trtable = state->trtable;
+      if (BE (trtable != NULL, 1))
+	return trtable[ch];
+
+      trtable = state->word_trtable;
+      if (BE (trtable != NULL, 1))
+        {
+	  unsigned int context;
+	  context
+	    = re_string_context_at (&mctx->input,
+				    re_string_cur_idx (&mctx->input) - 1,
+				    mctx->eflags);
+	  if (IS_WORD_CONTEXT (context))
+	    return trtable[ch + SBC_MAX];
+	  else
+	    return trtable[ch];
+	}
+
+      if (!build_trtable (mctx->dfa, state))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+
+      /* Retry, we now have a transition table.  */
+    }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+		      re_dfastate_t *next_state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int cur_idx = re_string_cur_idx (&mctx->input);
+
+  if (cur_idx > mctx->state_log_top)
+    {
+      mctx->state_log[cur_idx] = next_state;
+      mctx->state_log_top = cur_idx;
+    }
+  else if (mctx->state_log[cur_idx] == 0)
+    {
+      mctx->state_log[cur_idx] = next_state;
+    }
+  else
+    {
+      re_dfastate_t *pstate;
+      unsigned int context;
+      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+         the destination of a multibyte char/collating element/
+         back reference.  Then the next state is the union set of
+         these destinations and the results of the transition table.  */
+      pstate = mctx->state_log[cur_idx];
+      log_nodes = pstate->entrance_nodes;
+      if (next_state != NULL)
+        {
+          table_nodes = next_state->entrance_nodes;
+          *err = re_node_set_init_union (&next_nodes, table_nodes,
+					     log_nodes);
+          if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+        }
+      else
+        next_nodes = *log_nodes;
+      /* Note: We already add the nodes of the initial state,
+	 then we don't need to add them here.  */
+
+      context = re_string_context_at (&mctx->input,
+				      re_string_cur_idx (&mctx->input) - 1,
+				      mctx->eflags);
+      next_state = mctx->state_log[cur_idx]
+        = re_acquire_state_context (err, dfa, &next_nodes, context);
+      /* We don't need to check errors here, since the return value of
+         this function is next_state and ERR is already set.  */
+
+      if (table_nodes != NULL)
+        re_node_set_free (&next_nodes);
+    }
+
+  if (BE (dfa->nbackref, 0) && next_state != NULL)
+    {
+      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+	 later.  We must check them here, since the back references in the
+	 next state might use them.  */
+      *err = check_subexp_matching_top (mctx, &next_state->nodes,
+					cur_idx);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+
+      /* If the next state has back references.  */
+      if (next_state->has_backref)
+	{
+	  *err = transit_state_bkref (mctx, &next_state->nodes);
+	  if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+	  next_state = mctx->state_log[cur_idx];
+	}
+    }
+
+  return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+   multi-byte match, then look in the log for a state
+   from which to restart matching.  */
+re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+  re_dfastate_t *cur_state;
+  do
+    {
+      int max = mctx->state_log_top;
+      int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+      do
+	{
+          if (++cur_str_idx > max)
+            return NULL;
+          re_string_skip_bytes (&mctx->input, 1);
+	}
+      while (mctx->state_log[cur_str_idx] == NULL);
+
+      cur_state = merge_state_with_log (err, mctx, NULL);
+    }
+  while (*err == REG_NOERROR && cur_state == NULL);
+  return cur_state;
+}
+
+/* Helper functions for transit_state.  */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+   OP_OPEN_SUBEXP and which have corresponding back references in the regular
+   expression. And register them to use them later for evaluating the
+   correspoding back references.  */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+			   int str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int node_idx;
+  reg_errcode_t err;
+
+  /* TODO: This isn't efficient.
+	   Because there might be more than one nodes whose types are
+	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	   nodes.
+	   E.g. RE: (a){2}  */
+  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+    {
+      int node = cur_nodes->elems[node_idx];
+      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+	  && (dfa->used_bkref_map
+	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+	{
+	  err = match_ctx_add_subtop (mctx, node, str_idx);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte.  */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+		  re_dfastate_t *state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  re_node_set next_nodes;
+  re_dfastate_t *next_state;
+  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+  unsigned int context;
+
+  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+  if (BE (*err != REG_NOERROR, 0))
+    return NULL;
+  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+    {
+      int cur_node = state->nodes.elems[node_cnt];
+      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+	{
+	  *err = re_node_set_merge (&next_nodes,
+				    dfa->eclosures + dfa->nexts[cur_node]);
+	  if (BE (*err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return NULL;
+	    }
+	}
+    }
+  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+  /* We don't need to check errors here, since the return value of
+     this function is next_state and ERR is already set.  */
+
+  re_node_set_free (&next_nodes);
+  re_string_skip_bytes (&mctx->input, 1);
+  return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+
+  for (i = 0; i < pstate->nodes.nelem; ++i)
+    {
+      re_node_set dest_nodes, *new_nodes;
+      int cur_node_idx = pstate->nodes.elems[i];
+      int naccepted, dest_idx;
+      unsigned int context;
+      re_dfastate_t *dest_state;
+
+      if (!dfa->nodes[cur_node_idx].accept_mb)
+        continue;
+
+      if (dfa->nodes[cur_node_idx].constraint)
+	{
+	  context = re_string_context_at (&mctx->input,
+					  re_string_cur_idx (&mctx->input),
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+					   context))
+	    continue;
+	}
+
+      /* How many bytes the node can accept?  */
+      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+					   re_string_cur_idx (&mctx->input));
+      if (naccepted == 0)
+	continue;
+
+      /* The node can accepts `naccepted' bytes.  */
+      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+			       : mctx->max_mb_elem_len);
+      err = clean_state_log_if_needed (mctx, dest_idx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+#ifdef DEBUG
+      assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+      dest_state = mctx->state_log[dest_idx];
+      if (dest_state == NULL)
+	dest_nodes = *new_nodes;
+      else
+	{
+	  err = re_node_set_init_union (&dest_nodes,
+					dest_state->entrance_nodes, new_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      context = re_string_context_at (&mctx->input, dest_idx - 1,
+				      mctx->eflags);
+      mctx->state_log[dest_idx]
+	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+      if (dest_state != NULL)
+	re_node_set_free (&dest_nodes);
+      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+  for (i = 0; i < nodes->nelem; ++i)
+    {
+      int dest_str_idx, prev_nelem, bkc_idx;
+      int node_idx = nodes->elems[i];
+      unsigned int context;
+      const re_token_t *node = dfa->nodes + node_idx;
+      re_node_set *new_dest_nodes;
+
+      /* Check whether `node' is a backreference or not.  */
+      if (node->type != OP_BACK_REF)
+	continue;
+
+      if (node->constraint)
+	{
+	  context = re_string_context_at (&mctx->input, cur_str_idx,
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	    continue;
+	}
+
+      /* `node' is a backreference.
+	 Check the substring which the substring matched.  */
+      bkc_idx = mctx->nbkref_ents;
+      err = get_subexp (mctx, node_idx, cur_str_idx);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+      /* And add the epsilon closures (which is `new_dest_nodes') of
+	 the backreference to appropriate state_log.  */
+#ifdef DEBUG
+      assert (dfa->nexts[node_idx] != -1);
+#endif
+      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+	{
+	  int subexp_len;
+	  re_dfastate_t *dest_state;
+	  struct re_backref_cache_entry *bkref_ent;
+	  bkref_ent = mctx->bkref_ents + bkc_idx;
+	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+	    continue;
+	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+	  new_dest_nodes = (subexp_len == 0
+			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+			    : dfa->eclosures + dfa->nexts[node_idx]);
+	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+			  - bkref_ent->subexp_from);
+	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+					  mctx->eflags);
+	  dest_state = mctx->state_log[dest_str_idx];
+	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+			: mctx->state_log[cur_str_idx]->nodes.nelem);
+	  /* Add `new_dest_node' to state_log.  */
+	  if (dest_state == NULL)
+	    {
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, new_dest_nodes,
+					    context);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  else
+	    {
+	      re_node_set dest_nodes;
+	      err = re_node_set_init_union (&dest_nodes,
+					    dest_state->entrance_nodes,
+					    new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&dest_nodes);
+		  goto free_return;
+		}
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+	      re_node_set_free (&dest_nodes);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  /* We need to check recursively if the backreference can epsilon
+	     transit.  */
+	  if (subexp_len == 0
+	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+	    {
+	      err = check_subexp_matching_top (mctx, new_dest_nodes,
+					       cur_str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	      err = transit_state_bkref (mctx, new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	}
+    }
+  err = REG_NOERROR;
+ free_return:
+  return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+   Note that we might collect inappropriate candidates here.
+   However, the cost of checking them strictly here is too high, then we
+   delay these checking for prune_impossible_nodes().  */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int subexp_num, sub_top_idx;
+  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
+  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+  if (cache_idx != -1)
+    {
+      const struct re_backref_cache_entry *entry
+	= mctx->bkref_ents + cache_idx;
+      do
+        if (entry->node == bkref_node)
+	  return REG_NOERROR; /* We already checked it.  */
+      while (entry++->more);
+    }
+
+  subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+  /* For each sub expression  */
+  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+    {
+      reg_errcode_t err;
+      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+      re_sub_match_last_t *sub_last;
+      int sub_last_idx, sl_str, bkref_str_off;
+
+      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+	continue; /* It isn't related.  */
+
+      sl_str = sub_top->str_idx;
+      bkref_str_off = bkref_str_idx;
+      /* At first, check the last node of sub expressions we already
+	 evaluated.  */
+      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+	{
+	  int sl_str_diff;
+	  sub_last = sub_top->lasts[sub_last_idx];
+	  sl_str_diff = sub_last->str_idx - sl_str;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_diff > 0)
+	    {
+	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+		{
+		  /* Not enough chars for a successful match.  */
+		  if (bkref_str_off + sl_str_diff > mctx->input.len)
+		    break;
+
+		  err = clean_state_log_if_needed (mctx,
+						   bkref_str_off
+						   + sl_str_diff);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+		/* We don't need to search this sub expression any more.  */
+		break;
+	    }
+	  bkref_str_off += sl_str_diff;
+	  sl_str += sl_str_diff;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+
+	  /* Reload buf, since the preceding call might have reallocated
+	     the buffer.  */
+	  buf = (const char *) re_string_get_buffer (&mctx->input);
+
+	  if (err == REG_NOMATCH)
+	    continue;
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+
+      if (sub_last_idx < sub_top->nlasts)
+	continue;
+      if (sub_last_idx > 0)
+	++sl_str;
+      /* Then, search for the other last nodes of the sub expression.  */
+      for (; sl_str <= bkref_str_idx; ++sl_str)
+	{
+	  int cls_node, sl_str_off;
+	  const re_node_set *nodes;
+	  sl_str_off = sl_str - sub_top->str_idx;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_off > 0)
+	    {
+	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+		{
+		  /* If we are at the end of the input, we cannot match.  */
+		  if (bkref_str_off >= mctx->input.len)
+		    break;
+
+		  err = extend_buffers (mctx);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (buf [bkref_str_off++] != buf[sl_str - 1])
+		break; /* We don't need to search this sub expression
+			  any more.  */
+	    }
+	  if (mctx->state_log[sl_str] == NULL)
+	    continue;
+	  /* Does this state have a ')' of the sub expression?  */
+	  nodes = &mctx->state_log[sl_str]->nodes;
+	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
+				       OP_CLOSE_SUBEXP);
+	  if (cls_node == -1)
+	    continue; /* No.  */
+	  if (sub_top->path == NULL)
+	    {
+	      sub_top->path = calloc (sizeof (state_array_t),
+				      sl_str - sub_top->str_idx + 1);
+	      if (sub_top->path == NULL)
+		return REG_ESPACE;
+	    }
+	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+	     in the current context?  */
+	  err = check_arrival (mctx, sub_top->path, sub_top->node,
+			       sub_top->str_idx, cls_node, sl_str,
+			       OP_CLOSE_SUBEXP);
+	  if (err == REG_NOMATCH)
+	      continue;
+	  if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+	  if (BE (sub_last == NULL, 0))
+	    return REG_ESPACE;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+	  if (err == REG_NOMATCH)
+	    continue;
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp().  */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+   If it can arrive, register the sub expression expressed with SUB_TOP
+   and SUB_LAST.  */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
+{
+  reg_errcode_t err;
+  int to_idx;
+  /* Can the subexpression arrive the back reference?  */
+  err = check_arrival (mctx, &sub_last->path, sub_last->node,
+		       sub_last->str_idx, bkref_node, bkref_str,
+		       OP_OPEN_SUBEXP);
+  if (err != REG_NOERROR)
+    return err;
+  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+			     sub_last->str_idx);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+  return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+   Search '(' if FL_OPEN, or search ')' otherwise.
+   TODO: This function isn't efficient...
+	 Because there might be more than one nodes whose types are
+	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	 nodes.
+	 E.g. RE: (a){2}  */
+
+static int
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+		  int subexp_idx, int type)
+{
+  int cls_idx;
+  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+    {
+      int cls_node = nodes->elems[cls_idx];
+      const re_token_t *node = dfa->nodes + cls_node;
+      if (node->type == type
+	  && node->opr.idx == subexp_idx)
+	return cls_node;
+    }
+  return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
+   heavily reused.
+   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+	       int top_str, int last_node, int last_str, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  int subexp_num, backup_cur_idx, str_idx, null_cnt;
+  re_dfastate_t *cur_state = NULL;
+  re_node_set *cur_nodes, next_nodes;
+  re_dfastate_t **backup_state_log;
+  unsigned int context;
+
+  subexp_num = dfa->nodes[top_node].opr.idx;
+  /* Extend the buffer if we need.  */
+  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+    {
+      re_dfastate_t **new_array;
+      int old_alloc = path->alloc;
+      path->alloc += last_str + mctx->max_mb_elem_len + 1;
+      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+      if (BE (new_array == NULL, 0))
+	{
+	  path->alloc = old_alloc;
+	  return REG_ESPACE;
+	}
+      path->array = new_array;
+      memset (new_array + old_alloc, '\0',
+	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+    }
+
+  str_idx = path->next_idx ? path->next_idx : top_str;
+
+  /* Temporary modify MCTX.  */
+  backup_state_log = mctx->state_log;
+  backup_cur_idx = mctx->input.cur_idx;
+  mctx->state_log = path->array;
+  mctx->input.cur_idx = str_idx;
+
+  /* Setup initial node set.  */
+  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+  if (str_idx == top_str)
+    {
+      err = re_node_set_init_1 (&next_nodes, top_node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+      if (BE (err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+    }
+  else
+    {
+      cur_state = mctx->state_log[str_idx];
+      if (cur_state && cur_state->has_backref)
+	{
+	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      else
+	re_node_set_init_empty (&next_nodes);
+    }
+  if (str_idx == top_str || (cur_state && cur_state->has_backref))
+    {
+      if (next_nodes.nelem)
+	{
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+    }
+
+  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+    {
+      re_node_set_empty (&next_nodes);
+      if (mctx->state_log[str_idx + 1])
+	{
+	  err = re_node_set_merge (&next_nodes,
+				   &mctx->state_log[str_idx + 1]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      if (cur_state)
+	{
+	  err = check_arrival_add_next_nodes (mctx, str_idx,
+					      &cur_state->non_eps_nodes,
+					      &next_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      ++str_idx;
+      if (next_nodes.nelem)
+	{
+	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+    }
+  re_node_set_free (&next_nodes);
+  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+	       : &mctx->state_log[last_str]->nodes);
+  path->next_idx = str_idx;
+
+  /* Fix MCTX.  */
+  mctx->state_log = backup_state_log;
+  mctx->input.cur_idx = backup_cur_idx;
+
+  /* Then check the current node set has the node LAST_NODE.  */
+  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+    return REG_NOERROR;
+
+  return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival.  */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+   to NEXT_NODES.
+   TODO: This function is similar to the functions transit_state*(),
+	 however this function has many additional works.
+	 Can't we unify them?  */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+			      re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int result;
+  int cur_idx;
+  reg_errcode_t err = REG_NOERROR;
+  re_node_set union_set;
+  re_node_set_init_empty (&union_set);
+  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+    {
+      int naccepted = 0;
+      int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[cur_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[cur_node].accept_mb)
+	{
+	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+					       str_idx);
+	  if (naccepted > 1)
+	    {
+	      re_dfastate_t *dest_state;
+	      int next_node = dfa->nexts[cur_node];
+	      int next_idx = str_idx + naccepted;
+	      dest_state = mctx->state_log[next_idx];
+	      re_node_set_empty (&union_set);
+	      if (dest_state)
+		{
+		  err = re_node_set_merge (&union_set, &dest_state->nodes);
+		  if (BE (err != REG_NOERROR, 0))
+		    {
+		      re_node_set_free (&union_set);
+		      return err;
+		    }
+		}
+	      result = re_node_set_insert (&union_set, next_node);
+	      if (BE (result < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return REG_ESPACE;
+		}
+	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+							    &union_set);
+	      if (BE (mctx->state_log[next_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return err;
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      if (naccepted
+	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+	{
+	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+	  if (BE (result < 0, 0))
+	    {
+	      re_node_set_free (&union_set);
+	      return REG_ESPACE;
+	    }
+	}
+    }
+  re_node_set_free (&union_set);
+  return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+   CUR_NODES, however exclude the nodes which are:
+    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+			  int ex_subexp, int type)
+{
+  reg_errcode_t err;
+  int idx, outside_node;
+  re_node_set new_nodes;
+#ifdef DEBUG
+  assert (cur_nodes->nelem);
+#endif
+  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  /* Create a new node set NEW_NODES with the nodes which are epsilon
+     closures of the node in CUR_NODES.  */
+
+  for (idx = 0; idx < cur_nodes->nelem; ++idx)
+    {
+      int cur_node = cur_nodes->elems[idx];
+      const re_node_set *eclosure = dfa->eclosures + cur_node;
+      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+      if (outside_node == -1)
+	{
+	  /* There are no problematic nodes, just merge them.  */
+	  err = re_node_set_merge (&new_nodes, eclosure);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+      else
+	{
+	  /* There are problematic nodes, re-calculate incrementally.  */
+	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+    }
+  re_node_set_free (cur_nodes);
+  *cur_nodes = new_nodes;
+  return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+   Check incrementally the epsilon closure of TARGET, and if it isn't
+   problematic append it to DST_NODES.  */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+			      int target, int ex_subexp, int type)
+{
+  int cur_node;
+  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+    {
+      int err;
+
+      if (dfa->nodes[cur_node].type == type
+	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
+	{
+	  if (type == OP_CLOSE_SUBEXP)
+	    {
+	      err = re_node_set_insert (dst_nodes, cur_node);
+	      if (BE (err == -1, 0))
+		return REG_ESPACE;
+	    }
+	  break;
+	}
+      err = re_node_set_insert (dst_nodes, cur_node);
+      if (BE (err == -1, 0))
+	return REG_ESPACE;
+      if (dfa->edests[cur_node].nelem == 0)
+	break;
+      if (dfa->edests[cur_node].nelem == 2)
+	{
+	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+					      dfa->edests[cur_node].elems[1],
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      cur_node = dfa->edests[cur_node].elems[0];
+    }
+  return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+   destination of the back references by the appropriate entry
+   in MCTX->BKREF_ENTS.  */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+		    int cur_str, int subexp_num, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+  struct re_backref_cache_entry *ent;
+
+  if (cache_idx_start == -1)
+    return REG_NOERROR;
+
+ restart:
+  ent = mctx->bkref_ents + cache_idx_start;
+  do
+    {
+      int to_idx, next_node;
+
+      /* Is this entry ENT is appropriate?  */
+      if (!re_node_set_contains (cur_nodes, ent->node))
+	continue; /* No.  */
+
+      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+      /* Calculate the destination of the back reference, and append it
+	 to MCTX->STATE_LOG.  */
+      if (to_idx == cur_str)
+	{
+	  /* The backreference did epsilon transit, we must re-check all the
+	     node in the current state.  */
+	  re_node_set new_dests;
+	  reg_errcode_t err2, err3;
+	  next_node = dfa->edests[ent->node].elems[0];
+	  if (re_node_set_contains (cur_nodes, next_node))
+	    continue;
+	  err = re_node_set_init_1 (&new_dests, next_node);
+	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+	  err3 = re_node_set_merge (cur_nodes, &new_dests);
+	  re_node_set_free (&new_dests);
+	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+		  || err3 != REG_NOERROR, 0))
+	    {
+	      err = (err != REG_NOERROR ? err
+		     : (err2 != REG_NOERROR ? err2 : err3));
+	      return err;
+	    }
+	  /* TODO: It is still inefficient...  */
+	  goto restart;
+	}
+      else
+	{
+	  re_node_set union_set;
+	  next_node = dfa->nexts[ent->node];
+	  if (mctx->state_log[to_idx])
+	    {
+	      int ret;
+	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+					next_node))
+		continue;
+	      err = re_node_set_init_copy (&union_set,
+					   &mctx->state_log[to_idx]->nodes);
+	      ret = re_node_set_insert (&union_set, next_node);
+	      if (BE (err != REG_NOERROR || ret < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  err = err != REG_NOERROR ? err : REG_ESPACE;
+		  return err;
+		}
+	    }
+	  else
+	    {
+	      err = re_node_set_init_1 (&union_set, next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+	  re_node_set_free (&union_set);
+	  if (BE (mctx->state_log[to_idx] == NULL
+		  && err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  while (ent++->more);
+  return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+   Return 1 if succeeded, otherwise return NULL.  */
+
+static int
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+  reg_errcode_t err;
+  int i, j, ch, need_word_trtable = 0;
+  bitset_word_t elem, mask;
+  bool dests_node_malloced = false;
+  bool dest_states_malloced = false;
+  int ndests; /* Number of the destination states from `state'.  */
+  re_dfastate_t **trtable;
+  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+  re_node_set follows, *dests_node;
+  bitset_t *dests_ch;
+  bitset_t acceptable;
+
+  struct dests_alloc
+  {
+    re_node_set dests_node[SBC_MAX];
+    bitset_t dests_ch[SBC_MAX];
+  } *dests_alloc;
+
+  /* We build DFA states which corresponds to the destination nodes
+     from `state'.  `dests_node[i]' represents the nodes which i-th
+     destination state contains, and `dests_ch[i]' represents the
+     characters which i-th destination state accepts.  */
+  if (__libc_use_alloca (sizeof (struct dests_alloc)))
+    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+  else
+    {
+      dests_alloc = re_malloc (struct dests_alloc, 1);
+      if (BE (dests_alloc == NULL, 0))
+	return 0;
+      dests_node_malloced = true;
+    }
+  dests_node = dests_alloc->dests_node;
+  dests_ch = dests_alloc->dests_ch;
+
+  /* Initialize transiton table.  */
+  state->word_trtable = state->trtable = NULL;
+
+  /* At first, group all nodes belonging to `state' into several
+     destinations.  */
+  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+  if (BE (ndests <= 0, 0))
+    {
+      if (dests_node_malloced)
+	free (dests_alloc);
+      /* Return 0 in case of an error, 1 otherwise.  */
+      if (ndests == 0)
+	{
+	  state->trtable = (re_dfastate_t **)
+	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
+	  return 1;
+	}
+      return 0;
+    }
+
+  err = re_node_set_alloc (&follows, ndests + 1);
+  if (BE (err != REG_NOERROR, 0))
+    goto out_free;
+
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+			 + ndests * 3 * sizeof (re_dfastate_t *)))
+    dest_states = (re_dfastate_t **)
+      alloca (ndests * 3 * sizeof (re_dfastate_t *));
+  else
+    {
+      dest_states = (re_dfastate_t **)
+	malloc (ndests * 3 * sizeof (re_dfastate_t *));
+      if (BE (dest_states == NULL, 0))
+	{
+out_free:
+	  if (dest_states_malloced)
+	    free (dest_states);
+	  re_node_set_free (&follows);
+	  for (i = 0; i < ndests; ++i)
+	    re_node_set_free (dests_node + i);
+	  if (dests_node_malloced)
+	    free (dests_alloc);
+	  return 0;
+	}
+      dest_states_malloced = true;
+    }
+  dest_states_word = dest_states + ndests;
+  dest_states_nl = dest_states_word + ndests;
+  bitset_empty (acceptable);
+
+  /* Then build the states for all destinations.  */
+  for (i = 0; i < ndests; ++i)
+    {
+      int next_node;
+      re_node_set_empty (&follows);
+      /* Merge the follows of this destination states.  */
+      for (j = 0; j < dests_node[i].nelem; ++j)
+	{
+	  next_node = dfa->nexts[dests_node[i].elems[j]];
+	  if (next_node != -1)
+	    {
+	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		goto out_free;
+	    }
+	}
+      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+	goto out_free;
+      /* If the new state has context constraint,
+	 build appropriate states for these contexts.  */
+      if (dest_states[i]->has_constraint)
+	{
+	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+							  CONTEXT_WORD);
+	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+
+	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+	    need_word_trtable = 1;
+
+	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+							CONTEXT_NEWLINE);
+	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+ 	}
+      else
+	{
+	  dest_states_word[i] = dest_states[i];
+	  dest_states_nl[i] = dest_states[i];
+	}
+      bitset_merge (acceptable, dests_ch[i]);
+    }
+
+  if (!BE (need_word_trtable, 0))
+    {
+      /* We don't care about whether the following character is a word
+	 character, or we are in a single-byte character set so we can
+	 discern by looking at the character code: allocate a
+	 256-entry transition table.  */
+      trtable = state->trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      if (dfa->word_char[i] & mask)
+		trtable[ch] = dest_states_word[j];
+	      else
+		trtable[ch] = dest_states[j];
+	    }
+    }
+  else
+    {
+      /* We care about whether the following character is a word
+	 character, and we are in a multi-byte character set: discern
+	 by looking at the character code: build two 256-entry
+	 transition tables, one starting at trtable[0] and one
+	 starting at trtable[SBC_MAX].  */
+      trtable = state->word_trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      trtable[ch] = dest_states[j];
+	      trtable[ch + SBC_MAX] = dest_states_word[j];
+	    }
+    }
+
+  /* new line */
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
+      /* The current state accepts newline character.  */
+      for (j = 0; j < ndests; ++j)
+	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+	  {
+	    /* k-th destination accepts newline character.  */
+	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
+	    if (need_word_trtable)
+	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+	    /* There must be only one destination which accepts
+	       newline.  See group_nodes_into_DFAstates.  */
+	    break;
+	  }
+    }
+
+  if (dest_states_malloced)
+    free (dest_states);
+
+  re_node_set_free (&follows);
+  for (i = 0; i < ndests; ++i)
+    re_node_set_free (dests_node + i);
+
+  if (dests_node_malloced)
+    free (dests_alloc);
+
+  return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+   Then for all destinations, set the nodes belonging to the destination
+   to DESTS_NODE[i] and set the characters accepted by the destination
+   to DEST_CH[i].  This function return the number of destinations.  */
+
+static int
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+			    re_node_set *dests_node, bitset_t *dests_ch)
+{
+  reg_errcode_t err;
+  int result;
+  int i, j, k;
+  int ndests; /* Number of the destinations from `state'.  */
+  bitset_t accepts; /* Characters a node can accept.  */
+  const re_node_set *cur_nodes = &state->nodes;
+  bitset_empty (accepts);
+  ndests = 0;
+
+  /* For all the nodes belonging to `state',  */
+  for (i = 0; i < cur_nodes->nelem; ++i)
+    {
+      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+      re_token_type_t type = node->type;
+      unsigned int constraint = node->constraint;
+
+      /* Enumerate all single byte character this node can accept.  */
+      if (type == CHARACTER)
+	bitset_set (accepts, node->opr.c);
+      else if (type == SIMPLE_BRACKET)
+	{
+	  bitset_merge (accepts, node->opr.sbcset);
+	}
+      else if (type == OP_PERIOD)
+	{
+#ifdef RE_ENABLE_I18N
+	  if (dfa->mb_cur_max > 1)
+	    bitset_merge (accepts, dfa->sb_char);
+	  else
+#endif
+	    bitset_set_all (accepts);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == OP_UTF8_PERIOD)
+        {
+	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+        }
+#endif
+      else
+	continue;
+
+      /* Check the `accepts' and sift the characters which are not
+	 match it the context.  */
+      if (constraint)
+	{
+	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
+	    {
+	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+	      bitset_empty (accepts);
+	      if (accepts_newline)
+		bitset_set (accepts, NEWLINE_CHAR);
+	      else
+		continue;
+	    }
+	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
+	    {
+	      bitset_empty (accepts);
+	      continue;
+	    }
+
+	  if (constraint & NEXT_WORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && !node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	}
+
+      /* Then divide `accepts' into DFA states, or create a new
+	 state.  Above, we make sure that accepts is not empty.  */
+      for (j = 0; j < ndests; ++j)
+	{
+	  bitset_t intersec; /* Intersection sets, see below.  */
+	  bitset_t remains;
+	  /* Flags, see below.  */
+	  bitset_word_t has_intersec, not_subset, not_consumed;
+
+	  /* Optimization, skip if this state doesn't accept the character.  */
+	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+	    continue;
+
+	  /* Enumerate the intersection set of this state and `accepts'.  */
+	  has_intersec = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+	  /* And skip if the intersection set is empty.  */
+	  if (!has_intersec)
+	    continue;
+
+	  /* Then check if this state is a subset of `accepts'.  */
+	  not_subset = not_consumed = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    {
+	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+	    }
+
+	  /* If this state isn't a subset of `accepts', create a
+	     new group state, which has the `remains'. */
+	  if (not_subset)
+	    {
+	      bitset_copy (dests_ch[ndests], remains);
+	      bitset_copy (dests_ch[j], intersec);
+	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+	      if (BE (err != REG_NOERROR, 0))
+		goto error_return;
+	      ++ndests;
+	    }
+
+	  /* Put the position in the current group. */
+	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+	  if (BE (result < 0, 0))
+	    goto error_return;
+
+	  /* If all characters are consumed, go to next node. */
+	  if (!not_consumed)
+	    break;
+	}
+      /* Some characters remain, create a new group. */
+      if (j == ndests)
+	{
+	  bitset_copy (dests_ch[ndests], accepts);
+	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto error_return;
+	  ++ndests;
+	  bitset_empty (accepts);
+	}
+    }
+  return ndests;
+ error_return:
+  for (j = 0; j < ndests; ++j)
+    re_node_set_free (dests_node + j);
+  return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+   Return the number of the bytes the node accepts.
+   STR_IDX is the current index of the input string.
+
+   This function handles the nodes which can accept one character, or
+   one collating element like '.', '[a-z]', opposite to the other nodes
+   can only accept one byte.  */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+			 const re_string_t *input, int str_idx)
+{
+  const re_token_t *node = dfa->nodes + node_idx;
+  int char_len, elem_len;
+  int i;
+
+  if (BE (node->type == OP_UTF8_PERIOD, 0))
+    {
+      unsigned char c = re_string_byte_at (input, str_idx), d;
+      if (BE (c < 0xc2, 1))
+	return 0;
+
+      if (str_idx + 2 > input->len)
+	return 0;
+
+      d = re_string_byte_at (input, str_idx + 1);
+      if (c < 0xe0)
+	return (d < 0x80 || d > 0xbf) ? 0 : 2;
+      else if (c < 0xf0)
+	{
+	  char_len = 3;
+	  if (c == 0xe0 && d < 0xa0)
+	    return 0;
+	}
+      else if (c < 0xf8)
+	{
+	  char_len = 4;
+	  if (c == 0xf0 && d < 0x90)
+	    return 0;
+	}
+      else if (c < 0xfc)
+	{
+	  char_len = 5;
+	  if (c == 0xf8 && d < 0x88)
+	    return 0;
+	}
+      else if (c < 0xfe)
+	{
+	  char_len = 6;
+	  if (c == 0xfc && d < 0x84)
+	    return 0;
+	}
+      else
+	return 0;
+
+      if (str_idx + char_len > input->len)
+	return 0;
+
+      for (i = 1; i < char_len; ++i)
+	{
+	  d = re_string_byte_at (input, str_idx + i);
+	  if (d < 0x80 || d > 0xbf)
+	    return 0;
+	}
+      return char_len;
+    }
+
+  char_len = re_string_char_size_at (input, str_idx);
+  if (node->type == OP_PERIOD)
+    {
+      if (char_len <= 1)
+        return 0;
+      /* FIXME: I don't think this if is needed, as both '\n'
+	 and '\0' are char_len == 1.  */
+      /* '.' accepts any one character except the following two cases.  */
+      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+	   re_string_byte_at (input, str_idx) == '\n') ||
+	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
+	   re_string_byte_at (input, str_idx) == '\0'))
+	return 0;
+      return char_len;
+    }
+
+  elem_len = re_string_elem_size_at (input, str_idx);
+  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+    return 0;
+
+  if (node->type == COMPLEX_BRACKET)
+    {
+      const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+      const unsigned char *pin
+	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+      int j;
+      uint32_t nrules;
+# endif /* _LIBC */
+      int match_len = 0;
+      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+		    ? re_string_wchar_at (input, str_idx) : 0);
+
+      /* match with multibyte character?  */
+      for (i = 0; i < cset->nmbchars; ++i)
+	if (wc == cset->mbchars[i])
+	  {
+	    match_len = char_len;
+	    goto check_node_accept_bytes_match;
+	  }
+      /* match with character_class?  */
+      for (i = 0; i < cset->nchar_classes; ++i)
+	{
+	  wctype_t wt = cset->char_classes[i];
+	  if (__iswctype (wc, wt))
+	    {
+	      match_len = char_len;
+	      goto check_node_accept_bytes_match;
+	    }
+	}
+
+# ifdef _LIBC
+      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+      if (nrules != 0)
+	{
+	  unsigned int in_collseq = 0;
+	  const int32_t *table, *indirect;
+	  const unsigned char *weights, *extra;
+	  const char *collseqwc;
+	  int32_t idx;
+	  /* This #include defines a local function!  */
+#  include <locale/weight.h>
+
+	  /* match with collating_symbol?  */
+	  if (cset->ncoll_syms)
+	    extra = (const unsigned char *)
+	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+	  for (i = 0; i < cset->ncoll_syms; ++i)
+	    {
+	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
+	      /* Compare the length of input collating element and
+		 the length of current collating element.  */
+	      if (*coll_sym != elem_len)
+		continue;
+	      /* Compare each bytes.  */
+	      for (j = 0; j < *coll_sym; j++)
+		if (pin[j] != coll_sym[1 + j])
+		  break;
+	      if (j == *coll_sym)
+		{
+		  /* Match if every bytes is equal.  */
+		  match_len = j;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+
+	  if (cset->nranges)
+	    {
+	      if (elem_len <= char_len)
+		{
+		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+		  in_collseq = __collseq_table_lookup (collseqwc, wc);
+		}
+	      else
+		in_collseq = find_collation_sequence_value (pin, elem_len);
+	    }
+	  /* match with range expression?  */
+	  for (i = 0; i < cset->nranges; ++i)
+	    if (cset->range_starts[i] <= in_collseq
+		&& in_collseq <= cset->range_ends[i])
+	      {
+		match_len = elem_len;
+		goto check_node_accept_bytes_match;
+	      }
+
+	  /* match with equivalence_class?  */
+	  if (cset->nequiv_classes)
+	    {
+	      const unsigned char *cp = pin;
+	      table = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+	      weights = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+	      extra = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+	      indirect = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+	      idx = findidx (&cp);
+	      if (idx > 0)
+		for (i = 0; i < cset->nequiv_classes; ++i)
+		  {
+		    int32_t equiv_class_idx = cset->equiv_classes[i];
+		    size_t weight_len = weights[idx];
+		    if (weight_len == weights[equiv_class_idx])
+		      {
+			int cnt = 0;
+			while (cnt <= weight_len
+			       && (weights[equiv_class_idx + 1 + cnt]
+				   == weights[idx + 1 + cnt]))
+			  ++cnt;
+			if (cnt > weight_len)
+			  {
+			    match_len = elem_len;
+			    goto check_node_accept_bytes_match;
+			  }
+		      }
+		  }
+	    }
+	}
+      else
+# endif /* _LIBC */
+	{
+	  /* match with range expression?  */
+#if __GNUC__ >= 2
+	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+	  cmp_buf[2] = wc;
+#endif
+	  for (i = 0; i < cset->nranges; ++i)
+	    {
+	      cmp_buf[0] = cset->range_starts[i];
+	      cmp_buf[4] = cset->range_ends[i];
+	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+		{
+		  match_len = char_len;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+	}
+    check_node_accept_bytes_match:
+      if (!cset->non_match)
+	return match_len;
+      else
+	{
+	  if (match_len > 0)
+	    return 0;
+	  else
+	    return (elem_len > char_len) ? elem_len : char_len;
+	}
+    }
+  return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules == 0)
+    {
+      if (mbs_len == 1)
+	{
+	  /* No valid character.  Match it as a single byte character.  */
+	  const unsigned char *collseq = (const unsigned char *)
+	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+	  return collseq[mbs[0]];
+	}
+      return UINT_MAX;
+    }
+  else
+    {
+      int32_t idx;
+      const unsigned char *extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+      int32_t extrasize = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+      for (idx = 0; idx < extrasize;)
+	{
+	  int mbs_cnt, found = 0;
+	  int32_t elem_mbs_len;
+	  /* Skip the name of collating element name.  */
+	  idx = idx + extra[idx] + 1;
+	  elem_mbs_len = extra[idx++];
+	  if (mbs_len == elem_mbs_len)
+	    {
+	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+		  break;
+	      if (mbs_cnt == elem_mbs_len)
+		/* Found the entry.  */
+		found = 1;
+	    }
+	  /* Skip the byte sequence of the collating element.  */
+	  idx += elem_mbs_len;
+	  /* Adjust for the alignment.  */
+	  idx = (idx + 3) & ~3;
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	  /* Skip the wide char sequence of the collating element.  */
+	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+	  /* If we found the entry, return the sequence value.  */
+	  if (found)
+	    return *(uint32_t *) (extra + idx);
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	}
+      return UINT_MAX;
+    }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+   byte of the INPUT.  */
+
+static int
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+		   int idx)
+{
+  unsigned char ch;
+  ch = re_string_byte_at (&mctx->input, idx);
+  switch (node->type)
+    {
+    case CHARACTER:
+      if (node->opr.c != ch)
+        return 0;
+      break;
+
+    case SIMPLE_BRACKET:
+      if (!bitset_contain (node->opr.sbcset, ch))
+        return 0;
+      break;
+
+#ifdef RE_ENABLE_I18N
+    case OP_UTF8_PERIOD:
+      if (ch >= 0x80)
+        return 0;
+      /* FALLTHROUGH */
+#endif
+    case OP_PERIOD:
+      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  if (node->constraint)
+    {
+      /* The node has constraints.  Check whether the current context
+	 satisfies the constraints.  */
+      unsigned int context = re_string_context_at (&mctx->input, idx,
+						   mctx->eflags);
+      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Extend the buffers, if the buffers have run out.  */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+  reg_errcode_t ret;
+  re_string_t *pstr = &mctx->input;
+
+  /* Double the lengthes of the buffers.  */
+  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  if (mctx->state_log != NULL)
+    {
+      /* And double the length of state_log.  */
+      /* XXX We have no indication of the size of this buffer.  If this
+	 allocation fail we have no indication that the state_log array
+	 does not have the right size.  */
+      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+					      pstr->bufs_len + 1);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->state_log = new_array;
+    }
+
+  /* Then reconstruct the buffers.  */
+  if (pstr->icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	{
+	  ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (pstr->trans != NULL)
+	    re_string_translate_buffer (pstr);
+	}
+    }
+  return REG_NOERROR;
+}
+
+
+/* Functions for matching context.  */
+
+/* Initialize MCTX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
+{
+  mctx->eflags = eflags;
+  mctx->match_last = -1;
+  if (n > 0)
+    {
+      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+	return REG_ESPACE;
+    }
+  /* Already zero-ed by the caller.
+     else
+       mctx->bkref_ents = NULL;
+     mctx->nbkref_ents = 0;
+     mctx->nsub_tops = 0;  */
+  mctx->abkref_ents = n;
+  mctx->max_mb_elem_len = 1;
+  mctx->asub_tops = n;
+  return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+   This function must be invoked when the matcher changes the start index
+   of the input, or changes the input string.  */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+  int st_idx;
+  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+    {
+      int sl_idx;
+      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+	{
+	  re_sub_match_last_t *last = top->lasts[sl_idx];
+	  re_free (last->path.array);
+	  re_free (last);
+	}
+      re_free (top->lasts);
+      if (top->path)
+	{
+	  re_free (top->path->array);
+	  re_free (top->path);
+	}
+      free (top);
+    }
+
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX.  */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
+  match_ctx_clean (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+   Note that we assume that caller never call this function with duplicate
+   entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+		     int to)
+{
+  if (mctx->nbkref_ents >= mctx->abkref_ents)
+    {
+      struct re_backref_cache_entry* new_entry;
+      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+			      mctx->abkref_ents * 2);
+      if (BE (new_entry == NULL, 0))
+	{
+	  re_free (mctx->bkref_ents);
+	  return REG_ESPACE;
+	}
+      mctx->bkref_ents = new_entry;
+      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+      mctx->abkref_ents *= 2;
+    }
+  if (mctx->nbkref_ents > 0
+      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+  mctx->bkref_ents[mctx->nbkref_ents].node = node;
+  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+     If bit N is clear, means that this entry won't epsilon-transition to
+     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
+     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+     such node.
+
+     A backreference does not epsilon-transition unless it is empty, so set
+     to all zeros if FROM != TO.  */
+  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+    = (from == to ? ~0 : 0);
+
+  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+  if (mctx->max_mb_elem_len < to - from)
+    mctx->max_mb_elem_len = to - from;
+  return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx, or -1 if none is
+   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
+
+static int
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+{
+  int left, right, mid, last;
+  last = right = mctx->nbkref_ents;
+  for (left = 0; left < right;)
+    {
+      mid = (left + right) / 2;
+      if (mctx->bkref_ents[mid].str_idx < str_idx)
+	left = mid + 1;
+      else
+	right = mid;
+    }
+  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+    return left;
+  else
+    return -1;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+   at STR_IDX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
+{
+#ifdef DEBUG
+  assert (mctx->sub_tops != NULL);
+  assert (mctx->asub_tops > 0);
+#endif
+  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+    {
+      int new_asub_tops = mctx->asub_tops * 2;
+      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+						   re_sub_match_top_t *,
+						   new_asub_tops);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->sub_tops = new_array;
+      mctx->asub_tops = new_asub_tops;
+    }
+  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+    return REG_ESPACE;
+  mctx->sub_tops[mctx->nsub_tops]->node = node;
+  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+  return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
+{
+  re_sub_match_last_t *new_entry;
+  if (BE (subtop->nlasts == subtop->alasts, 0))
+    {
+      int new_alasts = 2 * subtop->alasts + 1;
+      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+						    re_sub_match_last_t *,
+						    new_alasts);
+      if (BE (new_array == NULL, 0))
+	return NULL;
+      subtop->lasts = new_array;
+      subtop->alasts = new_alasts;
+    }
+  new_entry = calloc (1, sizeof (re_sub_match_last_t));
+  if (BE (new_entry != NULL, 1))
+    {
+      subtop->lasts[subtop->nlasts] = new_entry;
+      new_entry->node = node;
+      new_entry->str_idx = str_idx;
+      ++subtop->nlasts;
+    }
+  return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
+{
+  sctx->sifted_states = sifted_sts;
+  sctx->limited_states = limited_sts;
+  sctx->last_node = last_node;
+  sctx->last_str_idx = last_str_idx;
+  re_node_set_init_empty (&sctx->limits);
+}
+
+
+/* Binary backward compatibility.  */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/gkregex.h b/3rdParty/metis/metis-5.1.0/GKlib/gkregex.h
new file mode 100644
index 000000000..807c404ec
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/gkregex.h
@@ -0,0 +1,556 @@
+/* Definitions for data structures and routines for the regular
+   expression library.
+   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically,
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+   for ^, because it is difficult to scan the regex backwards to find
+   whether ^ should be special.  */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+   immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+   re_compile_pattern.  */
+#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
+   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK						\
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
+   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
+       | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INTERVALS	    | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
+   | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
+   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+   removed and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+   buffer.  */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
+#endif
+
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Inalid collating element.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE unsigned char *
+#endif
+
+struct re_pattern_buffer
+{
+  /* Space that holds the compiled pattern.  It is declared as
+     `unsigned char *' because its elements are sometimes used as
+     array indexes.  */
+  unsigned char *buffer;
+
+  /* Number of bytes to which `buffer' points.  */
+  unsigned long int allocated;
+
+  /* Number of bytes actually used in `buffer'.  */
+  unsigned long int used;
+
+  /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+  /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
+     fastmap, if there is one, to skip over impossible starting points
+     for matches.  */
+  char *fastmap;
+
+  /* Either a translate table to apply to all characters before
+     comparing them, or zero for no translation.  The translation is
+     applied to a pattern when it is compiled and to a string when it
+     is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+  /* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+  /* Zero if this pattern cannot match the empty string, one else.
+     Well, in truth it's used only in `re_search_2', to see whether or
+     not we should use the fastmap, so we don't set this absolutely
+     perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+  /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+     for `max (RE_NREGS, re_nsub + 1)' groups.
+     If REGS_REALLOCATE, reallocate space if necessary.
+     If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+  /* Set to zero when `regex_compile' compiles a pattern; set to one
+     by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+  /* If set, `re_match_2' does not return information about
+     subexpressions.  */
+  unsigned no_sub : 1;
+
+  /* If set, a beginning-of-line anchor doesn't match at the beginning
+     of the string.  */
+  unsigned not_bol : 1;
+
+  /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+  /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+				       struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+		      int __length, int __start, int __range,
+		      struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+			const char *__string1, int __length1,
+			const char *__string2, int __length2, int __start,
+			int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+		     int __length, int __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+		       const char *__string1, int __length1,
+		       const char *__string2, int __length2, int __start,
+		       struct re_registers *__regs, int __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+			      struct re_registers *__regs,
+			      unsigned int __num_regs,
+			      regoff_t *__starts, regoff_t *__ends);
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility.  */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+   "restrict", and "configure" may have defined "restrict".  */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+#  if defined restrict || 199901L <= __STDC_VERSION__
+#   define __restrict restrict
+#  else
+#   define __restrict
+#  endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax.  */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+     && !defined __GNUG__
+#  define __restrict_arr __restrict
+# else
+#  define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp (regex_t *__restrict __preg,
+		    const char *__restrict __pattern,
+		    int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+		    const char *__restrict __string, size_t __nmatch,
+		    regmatch_t __pmatch[__restrict_arr],
+		    int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+			char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif	/* C++ */
+
+#endif /* regex.h */
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/graph.c b/3rdParty/metis/metis-5.1.0/GKlib/graph.c
new file mode 100644
index 000000000..209581865
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/graph.c
@@ -0,0 +1,1574 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with sparse graphs 
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: graph.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a graph and initializes it 
+    \returns the allocated graph. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Create()
+{
+  gk_graph_t *graph;
+
+  graph = (gk_graph_t *)gk_malloc(sizeof(gk_graph_t), "gk_graph_Create: graph");
+
+  gk_graph_Init(graph);
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! Initializes the graph.
+    \param graph is the graph to be initialized.
+*/
+/*************************************************************************/
+void gk_graph_Init(gk_graph_t *graph)
+{
+  memset(graph, 0, sizeof(gk_graph_t));
+  graph->nvtxs = -1;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for a graph.
+    \param graph is the graph to be freed.
+*/
+/*************************************************************************/
+void gk_graph_Free(gk_graph_t **graph)
+{
+  if (*graph == NULL)
+    return;
+  gk_graph_FreeContents(*graph);
+  gk_free((void **)graph, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the graph's different fields and
+    sets them to NULL.
+    \param graph is the graph whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_graph_FreeContents(gk_graph_t *graph)
+{
+  gk_free((void *)&graph->xadj, &graph->adjncy, 
+          &graph->iadjwgt, &graph->fadjwgt,
+          &graph->ivwgts, &graph->fvwgts,
+          &graph->ivsizes, &graph->fvsizes,
+          &graph->vlabels, 
+          LTERM);
+}
+
+
+/**************************************************************************/
+/*! Reads a sparse graph from the supplied file 
+    \param filename is the file that stores the data.
+    \param format is the graph format. The supported values are:
+           GK_GRAPH_FMT_METIS.
+    \param isfewgts is 1 if the edge-weights should be read as floats
+    \param isfvwgts is 1 if the vertex-weights should be read as floats
+    \param isfvsizes is 1 if the vertex-sizes should be read as floats
+    \returns the graph that was read.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Read(char *filename, int format, int isfewgts, 
+                int isfvwgts, int isfvsizes)
+{
+  ssize_t i, k, l;
+  size_t nfields, nvtxs, nedges, fmt, ncon, lnlen;
+  int32_t ival;
+  float fval;
+  int readsizes=0, readwgts=0, readvals=0, numbering=0;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin=NULL;
+  gk_graph_t *graph=NULL;
+
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  if (format == GK_GRAPH_FMT_METIS) {
+    fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
+    do {
+      if (gk_getline(&line, &lnlen, fpin) <= 0)
+        gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+    } while (line[0] == '%');
+
+    fmt = ncon = 0;
+    nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon);
+    if (nfields < 2)
+      gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+    nedges *= 2;
+
+    if (fmt > 111)
+      gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+    sprintf(fmtstr, "%03zu", fmt%1000);
+    readsizes = (fmtstr[0] == '1');
+    readwgts  = (fmtstr[1] == '1');
+    readvals  = (fmtstr[2] == '1');
+    numbering = 1;
+    ncon      = (ncon == 0 ? 1 : ncon);
+  }
+  else {
+    gk_errexit(SIGERR, "Unrecognized format: %d\n", format);
+  }
+
+  graph = gk_graph_Create();
+
+  graph->nvtxs = nvtxs;
+
+  graph->xadj   = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj");
+  graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy");
+  if (readvals) {
+    if (isfewgts)
+      graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt");
+    else
+      graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt");
+  }
+
+  if (readsizes) {
+    if (isfvsizes)
+      graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes");
+    else
+      graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes");
+  }
+
+  if (readwgts) {
+    if (isfvwgts)
+      graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts");
+    else
+      graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts");
+  }
+
+
+  /*----------------------------------------------------------------------
+   * Read the sparse graph file
+   *---------------------------------------------------------------------*/
+  numbering = (numbering ? - 1 : 0);
+  for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) {
+    do {
+      if (gk_getline(&line, &lnlen, fpin) == -1)
+        gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i);
+    } while (line[0] == '%');
+
+    head = line;
+    tail = NULL;
+
+    /* Read vertex sizes */
+    if (readsizes) {
+      if (isfvsizes) {
+#ifdef __MSC__
+        graph->fvsizes[i] = (float)strtod(head, &tail);
+#else
+        graph->fvsizes[i] = strtof(head, &tail);
+#endif
+        if (tail == head)
+          gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+        if (graph->fvsizes[i] < 0)
+          gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+      }
+      else {
+        graph->ivsizes[i] = strtol(head, &tail, 0);
+        if (tail == head)
+          gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+        if (graph->ivsizes[i] < 0)
+          gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+      }
+      head = tail;
+    }
+
+    /* Read vertex weights */
+    if (readwgts) {
+      for (l=0; l<ncon; l++) {
+        if (isfvwgts) {
+#ifdef __MSC__
+          graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+          graph->fvwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+          if (tail == head)
+            gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                    "for the %d constraints.\n", i+1, ncon);
+          if (graph->fvwgts[i*ncon+l] < 0)
+            gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+        }
+        else {
+          graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0);
+          if (tail == head)
+            gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                    "for the %d constraints.\n", i+1, ncon);
+          if (graph->ivwgts[i*ncon+l] < 0)
+            gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+        }
+        head = tail;
+      }
+    }
+
+   
+    /* Read the rest of the row */
+    while (1) {
+      ival = (int)strtol(head, &tail, 0);
+      if (tail == head) 
+        break;
+      head = tail;
+      
+      if ((graph->adjncy[k] = ival + numbering) < 0)
+        gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+
+      if (readvals) {
+        if (isfewgts) {
+#ifdef __MSC__
+          fval = (float)strtod(head, &tail);
+#else
+    	  fval = strtof(head, &tail);
+#endif
+          if (tail == head)
+            gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+
+          graph->fadjwgt[k] = fval;
+        }
+        else {
+    	  ival = strtol(head, &tail, 0);
+          if (tail == head)
+            gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+
+          graph->iadjwgt[k] = ival;
+        }
+        head = tail;
+      }
+      k++;
+    }
+    graph->xadj[i+1] = k;
+  }
+
+  if (k != nedges)
+    gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in "
+                       "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k);
+
+  gk_fclose(fpin);
+
+  gk_free((void **)&line, LTERM);
+
+  return graph;
+}
+
+
+/**************************************************************************/
+/*! Writes a graph into a file.
+    \param graph is the graph to be written,
+    \param filename is the name of the output file.
+    \param format is one of GK_GRAPH_FMT_METIS specifying
+           the format of the output file.
+*/
+/**************************************************************************/
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format)
+{
+  ssize_t i, j;
+  int hasvwgts, hasvsizes, hasewgts;
+  FILE *fpout;
+
+  if (format != GK_GRAPH_FMT_METIS)
+    gk_errexit(SIGERR, "Unknown file format. %d\n", format);
+
+  if (filename)
+    fpout = gk_fopen(filename, "w", "gk_graph_Write: fpout");
+  else
+    fpout = stdout; 
+
+
+  hasewgts  = (graph->iadjwgt || graph->fadjwgt);
+  hasvwgts  = (graph->ivwgts || graph->fvwgts);
+  hasvsizes = (graph->ivsizes || graph->fvsizes);
+
+  /* write the header line */
+  fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2);
+  if (hasvwgts || hasvsizes || hasewgts) 
+    fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts);
+  fprintf(fpout, "\n");
+
+
+  for (i=0; i<graph->nvtxs; i++) {
+    if (hasvsizes) {
+      if (graph->ivsizes)
+        fprintf(fpout, " %d", graph->ivsizes[i]);
+      else
+        fprintf(fpout, " %f", graph->fvsizes[i]);
+    }
+
+    if (hasvwgts) {
+      if (graph->ivwgts)
+        fprintf(fpout, " %d", graph->ivwgts[i]);
+      else
+        fprintf(fpout, " %f", graph->fvwgts[i]);
+    }
+
+    for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
+      fprintf(fpout, " %d", graph->adjncy[j]+1);
+      if (hasewgts) {
+        if (graph->iadjwgt)
+          fprintf(fpout, " %d", graph->iadjwgt[j]);
+        else 
+          fprintf(fpout, " %f", graph->fadjwgt[j]);
+      }
+    }
+    fprintf(fpout, "\n");
+  }
+  if (filename)
+    gk_fclose(fpout);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a graph.
+    \param graph is the graph to be duplicated.
+    \returns the newly created copy of the graph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph)
+{
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = graph->nvtxs;
+
+  /* copy the adjacency structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, 
+                            gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj"));
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels"));
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes"));
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, 
+                            gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraph containing a set of consecutive vertices.
+    \param graph is the original graph.
+    \param vstart is the starting vertex.
+    \param nvtxs is the number of vertices from vstart to extract.
+    \returns the newly created subgraph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs)
+{
+  ssize_t i;
+  gk_graph_t *ngraph;
+
+  if (vstart+nvtxs > graph->nvtxs)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = nvtxs;
+
+  /* copy the adjancy structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, 
+                              gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj"));
+  for (i=nvtxs; i>=0; i--)
+    ngraph->xadj[i] -= ngraph->xadj[0];
+  ASSERT(ngraph->xadj[0] == 0);
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels"));
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes"));
+
+
+  ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]);
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->adjncy+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->iadjwgt+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->fadjwgt+graph->xadj[vstart], 
+                            gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a graph that has been reordered according to the permutation.
+    \param[IN] graph is the graph to be re-ordered.
+    \param[IN] perm is the new ordering of the graph's vertices
+    \param[IN] iperm is the original ordering of the re-ordered graph's vertices
+    \returns the newly created copy of the graph.
+
+    \note Either perm or iperm can be NULL but not both.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, v, nvtxs;
+  int freeperm=0, freeiperm=0;
+  int32_t *adjncy;
+  gk_graph_t *ngraph;
+
+  if (perm == NULL && iperm == NULL)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs = nvtxs = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* allocate memory for the different structures that are present in graph */
+  if (graph->xadj)
+    ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj");
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts");
+
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes");
+
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels");
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts");
+
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes");
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy");
+
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt");
+
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt");
+
+
+  /* create perm/iperm if not provided */
+  if (perm == NULL) {
+    freeperm = 1;
+    perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); 
+    for (i=0; i<nvtxs; i++)
+      perm[iperm[i]] = i;
+  }
+  if (iperm == NULL) {
+    freeiperm = 1;
+    iperm = gk_i32malloc(nvtxs, "gk_graph_Reorder: iperm"); 
+    for (i=0; i<nvtxs; i++)
+      iperm[perm[i]] = i;
+  }
+
+  /* fill-in the information of the re-ordered graph */
+  ngraph->xadj[0] = jj = 0;
+  for (v=0; v<nvtxs; v++) {
+    u = iperm[v];
+    for (j=xadj[u]; j<xadj[u+1]; j++, jj++) {
+      ngraph->adjncy[jj] = perm[adjncy[j]];
+      if (graph->iadjwgt)
+        ngraph->iadjwgt[jj] = graph->iadjwgt[j];
+      if (graph->fadjwgt)
+        ngraph->fadjwgt[jj] = graph->fadjwgt[j];
+    }
+    if (graph->ivwgts)
+      ngraph->ivwgts[v] = graph->ivwgts[u];
+    if (graph->fvwgts)
+      ngraph->fvwgts[v] = graph->fvwgts[u];
+    if (graph->ivsizes)
+      ngraph->ivsizes[v] = graph->ivsizes[u];
+    if (graph->fvsizes)
+      ngraph->fvsizes[v] = graph->fvsizes[u];
+    if (graph->vlabels)
+      ngraph->vlabels[v] = graph->vlabels[u];
+
+    ngraph->xadj[v+1] = jj;
+  }
+
+
+  /* free memory */
+  if (freeperm)
+    gk_free((void **)&perm, LTERM);
+  if (freeiperm)
+    gk_free((void **)&iperm, LTERM);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! This function finds the connected components in a graph.
+
+    \param graph is the graph structure
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be graph->nvtxs+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be graph->nvtxs.
+
+    \returns the number of components that it found.
+
+    \note The cptr and cind parameters can be NULL, in which case only the
+          number of connected components is returned.
+*/
+/*************************************************************************/
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind)
+{
+  ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps;
+  ssize_t *xadj;
+  int32_t *adjncy, *pos, *todo;
+  int32_t mustfree_ccsr=0, mustfree_where=0;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = gk_i32malloc(nvtxs+1, "gk_graph_FindComponents: cptr");
+    cind = gk_i32malloc(nvtxs, "gk_graph_FindComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* The list of vertices that have not been touched yet. 
+     The valid entries are from [0..ntodo). */
+  todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: todo"));
+
+  /* For a vertex that has not been visited, pos[i] is the position in the
+     todo list that this vertex is stored. 
+     If a vertex has been visited, pos[i] = -1. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+
+  /* Find the connected componends */
+  ncmps = -1;
+  ntodo = nvtxs;     /* All vertices have not been visited */
+  first = last = 0;  /* Point to the first and last vertices that have been touched
+                        but not explored. 
+                        These vertices are stored in cind[first]...cind[last-1]. */
+  while (ntodo > 0) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;  /* Mark the end of the current CC */
+
+      ASSERT(pos[todo[0]] != -1);
+      i = todo[0];
+
+      cind[last++] = i;
+      pos[i] = -1;
+    }
+
+    i = cind[first++];  /* Get the first visited but unexplored vertex */
+
+    /* Remove i from the todo list and put the last item in the todo 
+       list at the position that i was so that the todo list will be
+       consequtive. The pos[] array is updated accordingly to keep track
+       the location of the vertices in the todo[] list. */
+    k = pos[i];
+    j = todo[k] = todo[--ntodo];
+    pos[j] = k;
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (pos[k] != -1) {
+        cind[last++] = k;
+        pos[k] = -1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+
+  gk_free((void **)&pos, &todo, LTERM);
+
+  return (int) ncmps;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    breadth-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+    The algorithm used is a simplified version of the method used to find
+    the connected components.
+
+    \param[IN]  graph is the graph structure
+    \param[IN]  v is the starting vertex of the BFS
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm,
+          int32_t **r_iperm)
+{
+  ssize_t j, *xadj;
+  int i, k, nvtxs, first, last;
+  int32_t *adjncy, *cot, *pos;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* This array will function like pos + touched of the CC method */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: pos"));
+
+  /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. 
+     Positions from [0...first) is the current iperm[] vector of the explored vertices; 
+     Positions from [first...last) is the OPEN list (i.e., visited vertices);
+     Positions from [last...nvtxs) is the todo list. */
+  cot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: cot"));
+
+
+  /* put v at the front of the todo list */
+  pos[0] = cot[0] = v;
+  pos[v] = cot[v] = 0;
+
+  /* Find the connected componends induced by the partition */
+  first = last = 0;
+  while (first < nvtxs) {
+    if (first == last) { /* Find another starting vertex */
+      k = cot[last];
+      ASSERT(pos[k] != -1);
+      pos[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = cot[first++];  /* the ++ advances the explored vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      /* if a node has already been visited, its perm[] will be -1 */
+      if (pos[k] != -1) {
+        /* pos[k] is the location within iperm of where k resides (it is in the 'todo' part); 
+           It is placed in that location cot[last] (end of OPEN list) that we 
+           are about to overwrite and update pos[cot[last]] to reflect that. */
+        cot[pos[k]]    = cot[last]; /* put the head of the todo list to 
+                                       where k was in the todo list */
+        pos[cot[last]] = pos[k];    /* update perm to reflect the move */
+
+        cot[last++] = k;  /* put node at the end of the OPEN list */
+        pos[k]      = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    /* use the 'pos' array to build the perm array */
+    for (i=0; i<nvtxs; i++)
+      pos[cot[i]] = i;
+
+    *r_perm = pos;
+    pos = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    *r_iperm = cot;
+    cot = NULL;
+  }
+
+
+  /* cleanup memory */
+  gk_free((void **)&pos, &cot, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs;
+  int32_t *adjncy, *perm, *degrees, *minIDs, *open;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the minimum vertex ID of an open vertex to the closed list */ 
+  minIDs  = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs");
+
+  /* the open list */ 
+  open  = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open");
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; 
+     otherwise perm[i] == -1.
+  */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put everything in it */
+  queue = gk_i32pqCreate(nvtxs);
+  for (i=0; i<nvtxs; i++)
+    gk_i32pqInsert(queue, i, 0);
+  gk_i32pqUpdate(queue, v, 1);
+
+  open[0] = v;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if ((v = gk_i32pqGetTop(queue)) == -1) 
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        degrees[u]++;
+        minIDs[u] = (i < minIDs[u] ? i : minIDs[u]);
+
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1);
+            break;
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+          case 3: /* Sum of orders in closed list */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += perm[adjncy[jj]];
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          case 4: /* Sum of order-differences (w.r.t. current number) in closed 
+                     list (updated once in a while) */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += (i-perm[adjncy[jj]]);
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          default:
+            ;
+        }
+      }
+    }
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &minIDs, &open, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs, nopen, ntodo;
+  int32_t *adjncy, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the weighted degree of the vertices in the closed list for type==3 */
+  wdegrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees");
+
+  /* the sum of differences for type==4 */
+  sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod");
+
+  /* the encountering level of a vertex type==5 */
+  level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level");
+
+  /* The open+todo list of vertices. 
+     The vertices from [0..nopen] are the open vertices.
+     The vertices from [nopen..ntodo) are the todo vertices.
+     */
+  ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot"));
+
+  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put the starting vertex in it */
+  queue = gk_i32pqCreate(nvtxs);
+  gk_i32pqInsert(queue, v, 1);
+
+  /* put v at the front of the open list */
+  pos[0] = ot[0] = v;
+  pos[v] = ot[v] = 0;
+  nopen = 1;
+  ntodo = nvtxs;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if (nopen == 0) { /* deal with non-connected graphs */
+      gk_i32pqInsert(queue, ot[0], 1);  
+      nopen++;
+    }
+
+    if ((v = gk_i32pqGetTop(queue)) == -1)
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+    if (ot[pos[v]] != v)
+      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
+    if (pos[v] >= nopen)
+      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);
+
+    /* remove v from the open list and re-arrange the todo part of the list */
+    ot[pos[v]]       = ot[nopen-1];
+    pos[ot[nopen-1]] = pos[v];
+    if (ntodo > nopen) {
+      ot[nopen-1]      = ot[ntodo-1];
+      pos[ot[ntodo-1]] = nopen-1;
+    }
+    nopen--;
+    ntodo--;
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        /* update ot list, if u is not in the open list by putting it at the end
+           of the open list. */
+        if (degrees[u] == 0) {
+          ot[pos[u]]     = ot[nopen];
+          pos[ot[nopen]] = pos[u];
+          ot[nopen]      = u;
+          pos[u]         = nopen;
+          nopen++;
+
+          level[u] = level[v]+1;
+          gk_i32pqInsert(queue, u, 0);  
+        }
+
+
+        /* update the in-closed degree */
+        degrees[u]++;
+
+        /* update the queues based on the type */
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
+            break;
+
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+
+          case 3: /* Sum of orders in closed list */
+            wdegrees[u] += i;
+            gk_i32pqUpdate(queue, u, wdegrees[u]);
+            break;
+
+          case 4: /* Sum of order-differences */
+            /* this is handled at the end of the loop */
+            ;
+            break;
+
+          case 5: /* BFS with in degree priority */
+            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
+            break;
+
+          case 6: /* Hybrid of 1+2 */
+            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
+            break;
+
+          default:
+            ;
+        }
+      }
+    }
+
+    if (type == 4) { /* update all the vertices in the open list */
+      for (j=0; j<nopen; j++) {
+        u = ot[j];
+        if (perm[u] != -1)
+          gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
+        sod[u] += degrees[u];
+        if (i<1000 || i%25==0)
+          gk_i32pqUpdate(queue, u, sod[u]);
+      }
+    }
+
+    /*
+    for (j=0; j<ntodo; j++) {
+      if (pos[ot[j]] != j)
+        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
+    }
+    */
+
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the single-source shortest path lengths from the
+    root node to all the other nodes in the graph. If the graph is not 
+    connected then, the sortest part to the vertices in the other components 
+    is -1.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the root of the single-source shortest path computations.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] sps[i] stores the length of the shortest path from v to vertex i.
+                If no such path exists, then it is -1. Note that the returned
+                array will be either an array of int32_t or an array of floats.
+                The specific type is determined by the existance of non NULL
+                iadjwgt and fadjwgt arrays. If both of these arrays exist, then
+                priority is given to iadjwgt.
+
+    \note The returned array should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps)
+{
+  ssize_t *xadj;
+  int i, u, nvtxs;
+  int32_t *adjncy, *inqueue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue");
+
+  /* determine if you will be computing using int32_t or float and proceed from there */
+  if (graph->iadjwgt != NULL) {
+    gk_i32pq_t *queue;
+    int32_t *adjwgt;
+    int32_t *sps;
+
+    adjwgt = graph->iadjwgt;
+
+    queue = gk_i32pqCreate(nvtxs);
+    gk_i32pqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_i32pqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_i32pqUpdate(queue, u, -sps[u]);
+          else {
+            gk_i32pqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_i32pqDestroy(queue);
+  }
+  else {
+    gk_fpq_t *queue;
+    float *adjwgt;
+    float *sps;
+
+    adjwgt = graph->fadjwgt;
+
+    queue = gk_fpqCreate(nvtxs);
+    gk_fpqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_fpqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_fpqUpdate(queue, u, -sps[u]);
+          else {
+            gk_fpqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_fpqDestroy(queue);
+  }
+
+  gk_free((void **)&inqueue, LTERM);
+
+}
+
+
+
+#ifdef XXX
+
+/*************************************************************************/
+/*! Sorts the adjacency lists in increasing vertex order
+    \param graph the graph itself,
+*/
+/**************************************************************************/
+void gk_graph_SortAdjacencies(gk_graph_t *graph)
+{
+  int n, nn=0;
+  ssize_t *ptr;
+  int *ind;
+  float *val;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!graph->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the graphrix does not exists.\n");
+
+      n   = graph->nrows;
+      ptr = graph->rowptr;
+      ind = graph->rowind;
+      val = graph->rowval;
+      break;
+
+    case GK_CSR_COL:
+      if (!graph->colptr)
+        gk_errexit(SIGERR, "Column-based view of the graphrix does not exists.\n");
+
+      n   = graph->ncols;
+      ptr = graph->colptr;
+      ind = graph->colind;
+      val = graph->colval;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel if (n > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    float *tval;
+
+    #pragma omp single
+    for (i=0; i<n; i++) 
+      nn = gk_max(nn, ptr[i+1]-ptr[i]);
+  
+    cand = gk_ikvmalloc(nn, "gk_graph_SortIndices: cand");
+    tval = gk_fmalloc(nn, "gk_graph_SortIndices: tval");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<n; i++) {
+      for (k=0, j=ptr[i]; j<ptr[i+1]; j++) {
+        if (j > ptr[i] && ind[j] < ind[j-1])
+          k = 1; /* an inversion */
+        cand[j-ptr[i]].val = j-ptr[i];
+        cand[j-ptr[i]].key = ind[j];
+        tval[j-ptr[i]]     = val[j];
+      }
+      if (k) {
+        gk_ikvsorti(ptr[i+1]-ptr[i], cand);
+        for (j=ptr[i]; j<ptr[i+1]; j++) {
+          ind[j] = cand[j-ptr[i]].key;
+          val[j] = tval[cand[j-ptr[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &tval, LTERM);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraphrix containing a certain set of rows.
+    \param graph is the original graphrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractRows(gk_graph_t *graph, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = nrows;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += graph->rowptr[rind[i]+1]-graph->rowptr[rind[i]];
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+    gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+    nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    ngraph->rowptr[++j] = nnz;
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraphrix corresponding to a specified partitioning of rows.
+    \param graph is the original graphrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = 0;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      ngraph->nrows++;
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    }
+  }
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+      gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+      ngraph->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Splits the graphrix into multiple sub-graphrices based on the provided
+    color array.
+    \param graph is the original graphrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the graphrix (row-wise structure). The graphrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of graphrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_graph_t **gk_graph_Split(gk_graph_t *graph, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_graph_t **sgraphs;
+
+  nrows  = graph->nrows;
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color)+1;
+
+  sgraphs = (gk_graph_t **)gk_malloc(sizeof(gk_graph_t *)*ncolors, "gk_graph_Split: sgraphs");
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i] = gk_graph_Create();
+    sgraphs[i]->nrows  = graph->nrows;
+    sgraphs[i]->ncols  = graph->ncols;
+    sgraphs[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_graph_Split: sgraphs[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      sgraphs[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, sgraphs[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i]->rowind = gk_imalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowind"); 
+    sgraphs[i]->rowval = gk_fmalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      sgraphs[color[j]]->rowind[sgraphs[color[j]]->rowptr[i]] = rowind[j];
+      sgraphs[color[j]]->rowval[sgraphs[color[j]]->rowptr[i]] = rowval[j];
+      sgraphs[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, sgraphs[i]->rowptr);
+
+  return sgraphs;
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the graphrix. The prunning takes place 
+    by analyzing the row structure of the graphrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param graph the graphrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the graphrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned graphrix consisting only of its row-based structure. 
+          The input graphrix is not modified. 
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+  
+  nrows = ngraph->nrows = graph->nrows;
+  ncols = ngraph->ncols = graph->ncols;
+
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr");
+  nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind");
+  nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_graph_Free(&ngraph);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return ngraph;
+}
+
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the graphrix to be unit 
+    length.
+    \param graph the graphrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_graph_Normalize(gk_graph_t *graph, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+  if (what&GK_CSR_ROW && graph->rowval) {
+    n   = graph->nrows;
+    ptr = graph->rowptr;
+    val = graph->rowval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS) 
+    {
+      #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++){
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; /* assume val[j] > 0 */ 
+        }
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+  	
+        }
+      }
+    }
+  }
+
+  if (what&GK_CSR_COL && graph->colval) {
+    n   = graph->ncols;
+    ptr = graph->colptr;
+    val = graph->colval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS)
+    {
+    #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++)
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; 
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+        }
+      }
+    }
+  }
+}
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/htable.c b/3rdParty/metis/metis-5.1.0/GKlib/htable.c
new file mode 100644
index 000000000..078e11434
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/htable.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2004, Regents of the University of Minnesota
+ *
+ * This file contains routines for manipulating a direct-access hash table
+ *
+ * Started 3/22/04
+ * George
+ *
+ */
+
+#include <GKlib.h>
+
+/******************************************************************************
+* This function creates the hash-table
+*******************************************************************************/
+gk_HTable_t *HTable_Create(int nelements)
+{
+  gk_HTable_t *htable;
+
+  htable            = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable");
+  htable->harray    = gk_ikvmalloc(nelements, "HTable_Create: harray");
+  htable->nelements = nelements;
+
+  HTable_Reset(htable);
+
+  return htable;
+}
+
+
+/******************************************************************************
+* This function resets the data-structures associated with the hash-table
+*******************************************************************************/
+void HTable_Reset(gk_HTable_t *htable)
+{
+  int i;
+
+  for (i=0; i<htable->nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+  htable->htsize = 0;
+
+}
+
+/******************************************************************************
+* This function resizes the hash-table
+*******************************************************************************/
+void HTable_Resize(gk_HTable_t *htable, int nelements)
+{
+  int i, old_nelements;
+  gk_ikv_t *old_harray;
+
+  old_nelements = htable->nelements;
+  old_harray = htable->harray;
+
+  /* prepare larger hash */
+  htable->nelements = nelements;
+  htable->htsize = 0;
+  htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray");
+  for (i=0; i<nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+
+  /* reassign the values */
+  for (i=0; i<old_nelements; i++)
+    if (old_harray[i].key != HTABLE_EMPTY)
+       HTable_Insert(htable, old_harray[i].key, old_harray[i].val);
+
+  /* remove old harray */
+  gk_free((void **)&old_harray, LTERM);
+}
+
+
+/******************************************************************************
+* This function inserts a key-value pair in the array
+*******************************************************************************/
+void HTable_Insert(gk_HTable_t *htable, int key, int val)
+{
+  int i, first;
+
+  if (htable->htsize > htable->nelements/2)
+    HTable_Resize(htable, 2*htable->nelements);
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function deletes key from the htable
+*******************************************************************************/
+void HTable_Delete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_Search(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the next key/val
+*******************************************************************************/
+int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type)
+{
+  int i;
+  static int first, last;
+
+  if (type == HTABLE_FIRST)
+    first = last = HTable_HFunction(htable->nelements, key);
+
+  if (first > last) {
+    for (i=first; i<htable->nelements; i++) {
+      if (htable->harray[i].key == key) {
+        *r_val = htable->harray[i].val;
+        first = i+1;
+        return 1;
+      }
+      else if (htable->harray[i].key == HTABLE_EMPTY)
+        return -1;
+    }
+    first = 0;
+  }
+
+  for (i=first; i<last; i++) {
+    if (htable->harray[i].key == key) {
+      *r_val = htable->harray[i].val;
+      first = i+1;
+      return 1;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_SearchAndDelete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  return -1;
+
+}
+
+
+
+/******************************************************************************
+* This function destroys the data structures associated with the hash-table
+*******************************************************************************/
+void HTable_Destroy(gk_HTable_t *htable)
+{
+  gk_free((void **)&htable->harray, &htable, LTERM);
+}
+
+
+/******************************************************************************
+* This is the hash-function. Based on multiplication
+*******************************************************************************/
+int HTable_HFunction(int nelements, int key)
+{
+  return (int)(key%nelements);
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/io.c b/3rdParty/metis/metis-5.1.0/GKlib/io.c
new file mode 100644
index 000000000..caaedcb59
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/io.c
@@ -0,0 +1,384 @@
+/*!
+\file  io.c
+\brief Various file I/O functions.
+
+This file contains various functions that perform I/O.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: io.c 12591 2012-09-01 19:03:15Z karypis $ \endverbatim
+*/
+
+#ifdef HAVE_GETLINE
+/* Get getline to be defined. */
+#define _GNU_SOURCE
+#include <stdio.h>
+#undef _GNU_SOURCE
+#endif
+
+#include <GKlib.h>
+
+/*************************************************************************
+* This function opens a file
+**************************************************************************/
+FILE *gk_fopen(char *fname, char *mode, const char *msg)
+{
+  FILE *fp;
+  char errmsg[8192];
+
+  fp = fopen(fname, mode);
+  if (fp != NULL)
+    return fp;
+
+  sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg);
+  perror(errmsg);
+  errexit("Failed on gk_fopen()\n");
+
+  return NULL;
+}
+
+
+/*************************************************************************
+* This function closes a file
+**************************************************************************/
+void gk_fclose(FILE *fp)
+{
+  fclose(fp);
+}
+
+
+/*************************************************************************/
+/*! This function is the GKlib implementation of glibc's getline()
+    function.
+    \returns -1 if the EOF has been reached, otherwise it returns the 
+             number of bytes read.
+*/
+/*************************************************************************/
+gk_idx_t gk_getline(char **lineptr, size_t *n, FILE *stream)
+{
+#ifdef HAVE_GETLINE
+  return getline(lineptr, n, stream);
+#else
+  size_t i;
+  int ch;
+
+  if (feof(stream))
+    return -1;  
+
+  /* Initial memory allocation if *lineptr is NULL */
+  if (*lineptr == NULL || *n == 0) {
+    *n = 1024;
+    *lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr");
+  }
+
+  /* get into the main loop */
+  i = 0;
+  while ((ch = getc(stream)) != EOF) {
+    (*lineptr)[i++] = (char)ch;
+
+    /* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */
+    if (i+1 == *n) { 
+      *n = 2*(*n);
+      *lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr");
+    }
+      
+    if (ch == '\n')
+      break;
+  }
+  (*lineptr)[i] = '\0';
+
+  return (i == 0 ? -1 : i);
+#endif
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a text file and returns it in the
+    form of an array of strings.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+char **gk_readfile(char *fname, gk_idx_t *r_nlines)
+{
+  size_t lnlen, nlines;
+  char *line=NULL, **lines=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      gk_strtprune(line, "\n\r");
+      lines[nlines++] = gk_strdup(line);
+    }
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return lines;
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfile(char *fname, gk_idx_t *r_nlines)
+{
+  size_t lnlen, nlines;
+  char *line=NULL;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i32malloc(nlines, "gk_i32readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd32, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfile(char *fname, gk_idx_t *r_nlines)
+{
+  size_t lnlen, nlines;
+  char *line=NULL;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i64malloc(nlines, "gk_i64readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd64, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfilebin(char *fname, ssize_t *r_nelmnts)
+{
+  ssize_t fsize, nelmnts;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = -1;
+
+  fsize = (ssize_t) gk_getfsize(fname);
+  if (fsize%sizeof(int32_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int32_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int32_t);
+  array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i32readfilebin");
+  
+  if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfilebin(char *fname, ssize_t *r_nelmnts)
+{
+  ssize_t fsize, nelmnts;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = -1;
+
+  fsize = (ssize_t) gk_getfsize(fname);
+  if (fsize%sizeof(int64_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int64_t);
+  array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i64readfilebin");
+  
+  if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of float.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+float *gk_freadfilebin(char *fname, ssize_t *r_nelmnts)
+{
+  ssize_t fsize, nelmnts;
+  float *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = -1;
+
+  fsize = (ssize_t) gk_getfsize(fname);
+  if (fsize%sizeof(float) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(float);
+  array = gk_fmalloc(nelmnts, "gk_freadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_freadfilebin");
+  
+  if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_fwritefilebin(char *fname, size_t n, float *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_fwritefilebin");
+
+  fsize = fwrite(a, sizeof(float), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of double.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+double *gk_dreadfilebin(char *fname, ssize_t *r_nelmnts)
+{
+  ssize_t fsize, nelmnts;
+  double *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = -1;
+
+  fsize = (ssize_t) gk_getfsize(fname);
+  if (fsize%sizeof(double) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(double);
+  array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_dreadfilebin");
+  
+  if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/itemsets.c b/3rdParty/metis/metis-5.1.0/GKlib/itemsets.c
new file mode 100644
index 000000000..65b5af40d
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/itemsets.c
@@ -0,0 +1,210 @@
+/*!
+ * \file
+ * \brief Frequent/Closed itemset discovery routines 
+ *
+ * This file contains the code for finding frequent/closed itemests. These routines
+ * are implemented using a call-back mechanism to deal with the discovered itemsets.
+ *
+ * \date 6/13/2008
+ * \author George Karypis
+ * \version\verbatim $Id: itemsets.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+/*-------------------------------------------------------------*/
+/*! Data structures for use within this module */
+/*-------------------------------------------------------------*/
+typedef struct {
+  int minfreq;  /* the minimum frequency of a pattern */
+  int maxfreq;  /* the maximum frequency of a pattern */
+  int minlen;   /* the minimum length of the requested pattern */
+  int maxlen;   /* the maximum length of the requested pattern */
+  int tnitems;  /* the initial range of the item space */
+
+  /* the call-back function */
+  void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids); 
+  void *stateptr;   /* the user-supplied pointer to pass to the callback */
+
+  /* workspace variables */
+  int *rmarker;
+  gk_ikv_t *cand;
+} isparams_t;
+
+
+/*-------------------------------------------------------------*/
+/*! Prototypes for this module */
+/*-------------------------------------------------------------*/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix);
+gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
+
+
+
+/*************************************************************************/
+/*! The entry point of the frequent itemset discovery code */
+/*************************************************************************/
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, 
+        int minfreq, int maxfreq, int minlen, int maxlen, 
+        void (*process_itemset)(void *stateptr, int nitems, int *itemids, 
+                                int ntrans, int *transids),
+        void *stateptr)
+{
+  ssize_t i;
+  gk_csr_t *mat, *pmat;
+  isparams_t params;
+  int *pattern;
+
+  /* Create the matrix */
+  mat = gk_csr_Create();
+  mat->nrows  = ntrans;
+  mat->ncols  = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
+  mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
+  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
+  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
+
+  /* Setup the parameters */
+  params.minfreq  = minfreq;
+  params.maxfreq  = (maxfreq == -1 ? mat->nrows : maxfreq);
+  params.minlen   = minlen;
+  params.maxlen   = (maxlen == -1 ? mat->ncols : maxlen);
+  params.tnitems  = mat->ncols;
+  params.callback = process_itemset;
+  params.stateptr = stateptr;
+  params.rmarker  = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
+  params.cand     = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
+
+  /* Perform the initial projection */
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+  pmat = itemsets_project_matrix(&params, mat, -1);
+  gk_csr_Free(&mat);
+
+  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
+  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern); 
+
+  gk_csr_Free(&pmat);
+  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
+
+}
+
+
+
+/*************************************************************************/
+/*! The recursive routine for DFS-based frequent pattern discovery */
+/*************************************************************************/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix)
+{
+  ssize_t i;
+  gk_csr_t *cmat;
+
+  /* Project each frequent column */
+  for (i=0; i<mat->ncols; i++) {
+    prefix[preflen] = mat->colids[i];
+
+    if (preflen+1 >= params->minlen)
+      (*params->callback)(params->stateptr, preflen+1, prefix, 
+           mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
+
+    if (preflen+1 < params->maxlen) {
+      cmat = itemsets_project_matrix(params, mat, i);
+      itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
+      gk_csr_Free(&cmat);
+    }
+  }
+
+}
+
+
+/******************************************************************************/
+/*! This function projects a matrix w.r.t. to a particular column. 
+    It performs the following steps:
+    - Determines the length of each column that is remaining
+    - Sorts the columns in increasing length
+    - Creates a column-based version of the matrix with the proper
+      column ordering and renamed rowids.
+ */
+/*******************************************************************************/
+gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
+{
+  ssize_t i, j, k, ii, pnnz;
+  int nrows, ncols, pnrows, pncols;
+  ssize_t *colptr, *pcolptr;
+  int *colind, *colids, *pcolind, *pcolids, *rmarker;
+  gk_csr_t *pmat;
+  gk_ikv_t *cand;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colids = mat->colids;
+
+  rmarker = params->rmarker;
+  cand    = params->cand;
+
+
+  /* Allocate space for the projected matrix based on what you know thus far */
+  pmat = gk_csr_Create();
+  pmat->nrows  = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
+
+
+  /* Mark the rows that will be kept and determine the prowids */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 1, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 1;
+  }
+
+
+  /* Determine the length of each column that will be left in the projected matrix */
+  for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
+    for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
+      k += rmarker[colind[j]];
+    }
+    if (k >= params->minfreq && k <= params->maxfreq) {
+      cand[pncols].val   = i;
+      cand[pncols++].key = k;
+      pnnz += k;
+    }
+  }
+
+  /* Sort the columns in increasing order */
+  gk_ikvsorti(pncols, cand);
+
+
+  /* Allocate space for the remaining fields of the projected matrix */
+  pmat->ncols  = pncols;
+  pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
+  pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
+  pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
+
+
+  /* Populate the projected matrix */
+  pcolptr[0] = 0;
+  for (pnnz=0, ii=0; ii<pncols; ii++) {
+    i = cand[ii].val;
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (rmarker[colind[j]]) 
+        pcolind[pnnz++] = colind[j];
+    }
+
+    pcolids[ii] = colids[i];
+    pcolptr[ii+1] = pnnz;
+  }
+
+
+  /* Reset the rmarker array */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 0, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 0;
+  }
+
+
+  return pmat;
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/mcore.c b/3rdParty/metis/metis-5.1.0/GKlib/mcore.c
new file mode 100644
index 000000000..6442e03a9
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/mcore.c
@@ -0,0 +1,393 @@
+/*!
+\file 
+\brief Functions dealing with creating and allocating mcores
+
+\date Started 5/30/11
+\author George
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version $Id: mcore.c 13953 2013-03-30 16:20:07Z karypis $
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! This function creates an mcore 
+ */
+/*************************************************************************/
+gk_mcore_t *gk_mcoreCreate(size_t coresize)
+{
+  gk_mcore_t *mcore;
+
+  mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore");
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  mcore->coresize = coresize;
+  mcore->corecpos = 0;
+
+  mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core"));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  mcore->mops  = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops");
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function creates an mcore. This version is used for gkmcore.
+ */
+/*************************************************************************/
+gk_mcore_t *gk_gkmcoreCreate()
+{
+  gk_mcore_t *mcore;
+
+  if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL)
+    return NULL;
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) {
+    free(mcore);
+    return NULL;
+  }
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore.
+ */
+/*************************************************************************/
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "           coresize: %12zu         nmops: %12zu  cmop: %6zu\n"
+           "        num_callocs: %12zu   num_hallocs: %12zu\n"
+           "       size_callocs: %12zu  size_hallocs: %12zu\n"
+           "        cur_callocs: %12zu   cur_hallocs: %12zu\n"
+           "        max_callocs: %12zu   max_hallocs: %12zu\n",
+           mcore->coresize, mcore->nmops, mcore->cmop,
+           mcore->num_callocs,  mcore->num_hallocs,
+           mcore->size_callocs, mcore->size_hallocs,
+           mcore->cur_callocs,  mcore->cur_hallocs,
+           mcore->max_callocs,  mcore->max_hallocs);
+
+  if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_callocs: %6zu  cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_callocs,  mcore->cur_hallocs, mcore->cmop);
+  }
+
+  gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore. This version is for gkmcore.
+ */
+/*************************************************************************/
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "         nmops: %12zu  cmop: %6zu\n"
+           "   num_hallocs: %12zu\n"
+           "  size_hallocs: %12zu\n"
+           "   cur_hallocs: %12zu\n"
+           "   max_hallocs: %12zu\n",
+           mcore->nmops, mcore->cmop,
+           mcore->num_hallocs,
+           mcore->size_hallocs,
+           mcore->cur_hallocs,
+           mcore->max_hallocs);
+
+  if (mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_hallocs, mcore->cmop);
+  }
+
+  free(mcore->mops);
+  free(mcore);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core/heap 
+ */
+/*************************************************************************/
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes)
+{
+  void *ptr;
+
+  /* pad to make pointers 8-byte aligned */
+  nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8);
+
+  if (mcore->corecpos + nbytes < mcore->coresize) {
+    /* service this request from the core */
+    ptr = ((char *)mcore->core)+mcore->corecpos;
+    mcore->corecpos += nbytes;
+
+    gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr);
+  }
+  else {
+    /* service this request from the heap */
+    ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr");
+
+    gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr);
+  }
+
+  /*
+  printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1, 
+      mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes);
+  */
+
+  return ptr;
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes 
+ */
+/*************************************************************************/
+void gk_mcorePush(gk_mcore_t *mcore)
+{
+  gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes. This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcorePush(gk_mcore_t *mcore)
+{
+  gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push 
+ */
+/*************************************************************************/
+void gk_mcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_CORE: /* core free */
+        if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes)
+          errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n",
+              mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes);
+
+        mcore->corecpos    -= mcore->mops[mcore->cmop].nbytes;
+        mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      case GK_MOPT_HEAP: /* heap free */
+        gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+  /*printf("MCPPOP:    %zu\n", mcore->cmop); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push. This version is
+    for poping the gkmcore and it uses free instead of gk_free.
+ */
+/*************************************************************************/
+void gk_gkmcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_HEAP: /* heap free */
+        free(mcore->mops[mcore->cmop].ptr);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list.
+ */
+/*************************************************************************/
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_CORE:
+      mcore->num_callocs++;
+      mcore->size_callocs += nbytes;
+      mcore->cur_callocs  += nbytes;
+      if (mcore->max_callocs < mcore->cur_callocs)
+        mcore->max_callocs = mcore->cur_callocs;
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list. This is the gkmcore
+    version.
+ */
+/*************************************************************************/
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+ */
+/*************************************************************************/
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "mcoreDel should never have been here!\n");
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+    This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n");
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/memory.c b/3rdParty/metis/metis-5.1.0/GKlib/memory.c
new file mode 100644
index 000000000..cdd00fa79
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/memory.c
@@ -0,0 +1,252 @@
+/*!
+\file  memory.c
+\brief This file contains various allocation routines 
+
+The allocation routines included are for 1D and 2D arrays of the 
+most datatypes that GKlib support. Many of these routines are 
+defined with the help of the macros in gk_memory.h. These macros 
+can be used to define other memory allocation routines.
+
+\date   Started 4/3/2007
+\author George
+\version\verbatim $Id: memory.c 10783 2011-09-21 23:19:56Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* This is for the global mcore that tracks all heap allocations */
+static __thread gk_mcore_t *gkmcore = NULL;
+
+
+/*************************************************************************/
+/*! Define the set of memory allocation routines for each data type */
+/**************************************************************************/
+GK_MKALLOC(gk_c,   char)
+GK_MKALLOC(gk_i,   int)
+GK_MKALLOC(gk_i32, int32_t)
+GK_MKALLOC(gk_i64, int64_t)
+GK_MKALLOC(gk_z,   ssize_t)
+GK_MKALLOC(gk_f,   float)
+GK_MKALLOC(gk_d,   double)
+GK_MKALLOC(gk_idx, gk_idx_t)
+
+GK_MKALLOC(gk_ckv,   gk_ckv_t)
+GK_MKALLOC(gk_ikv,   gk_ikv_t)
+GK_MKALLOC(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC(gk_zkv,   gk_zkv_t)
+GK_MKALLOC(gk_fkv,   gk_fkv_t)
+GK_MKALLOC(gk_dkv,   gk_dkv_t)
+GK_MKALLOC(gk_skv,   gk_skv_t)
+GK_MKALLOC(gk_idxkv, gk_idxkv_t)
+
+
+
+
+
+
+/*************************************************************************/
+/*! This function allocates a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2)
+{
+  gk_idx_t i, j;
+  void **matrix;
+
+  *r_matrix = NULL;
+
+  if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) {
+    if ((matrix[i] = (void *)gk_malloc(ndim2*elmlen, "gk_AllocMatrix: matrix[i]")) == NULL) {
+      for (j=0; j<i; j++) 
+        gk_free((void **)&matrix[j], LTERM);
+      return;
+    }
+  }
+
+  *r_matrix = matrix;
+}
+
+
+/*************************************************************************/
+/*! This function frees a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_FreeMatrix(void ***r_matrix, size_t ndim1, size_t ndim2)
+{
+  gk_idx_t i;
+  void **matrix;
+
+  if ((matrix = *r_matrix) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) 
+    gk_free((void **)&matrix[i], LTERM);
+
+  gk_free((void **)r_matrix, LTERM); 
+
+}
+
+
+/*************************************************************************/
+/*! This function initializes tracking of heap allocations. 
+*/
+/*************************************************************************/
+int gk_malloc_init()
+{
+  if (gkmcore == NULL)
+    gkmcore = gk_gkmcoreCreate();
+
+  if (gkmcore == NULL)
+    return 0;
+
+  gk_gkmcorePush(gkmcore);
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function frees the memory that has been allocated since the
+    last call to gk_malloc_init().
+*/
+/*************************************************************************/
+void gk_malloc_cleanup(int showstats)
+{
+  if (gkmcore != NULL) {
+    gk_gkmcorePop(gkmcore);
+    if (gkmcore->cmop == 0) {
+      gk_gkmcoreDestroy(&gkmcore, showstats);
+      gkmcore = NULL;
+    }
+  }
+}
+
+
+/*************************************************************************/
+/*! This function is my wrapper around malloc that provides the following
+    enhancements over malloc:
+    * It always allocates one byte of memory, even if 0 bytes are requested.
+      This is to ensure that checks of returned values do not lead to NULL
+      due to 0 bytes requested.
+    * It zeros-out the memory that is allocated. This is for a quick init
+      of the underlying datastructures.
+*/
+/**************************************************************************/
+void *gk_malloc(size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  ptr = (void *)malloc(nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Current memory used:  %10zu bytes\n", gk_GetCurMemoryUsed());
+    fprintf(stderr, "   Maximum memory used:  %10zu bytes\n", gk_GetMaxMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  /* zero-out the allocated space */
+#ifndef NDEBUG
+  memset(ptr, 0, nbytes);
+#endif
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around realloc
+**************************************************************************/
+void *gk_realloc(void *oldptr, size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  /* remove this memory de-allocation */
+  if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr);
+
+  ptr = (void *)realloc(oldptr, nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed());
+    fprintf(stderr, "   Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around free, allows multiple pointers    
+**************************************************************************/
+void gk_free(void **ptr1,...)
+{
+  va_list plist;
+  void **ptr;
+
+  if (*ptr1 != NULL) {
+    free(*ptr1);
+
+    /* remove this memory de-allocation */
+    if (gkmcore != NULL) gk_gkmcoreDel(gkmcore, *ptr1);
+  }
+  *ptr1 = NULL;
+
+  va_start(plist, ptr1);
+  while ((ptr = va_arg(plist, void **)) != LTERM) {
+    if (*ptr != NULL) {
+      free(*ptr);
+
+      /* remove this memory de-allocation */
+      if (gkmcore != NULL) gk_gkmcoreDel(gkmcore, *ptr);
+    }
+    *ptr = NULL;
+  }
+  va_end(plist);
+}          
+
+
+/*************************************************************************
+* This function returns the current ammount of dynamically allocated
+* memory that is used by the system
+**************************************************************************/
+size_t gk_GetCurMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->cur_hallocs;
+}
+
+
+/*************************************************************************
+* This function returns the maximum ammount of dynamically allocated 
+* memory that was used by the system
+**************************************************************************/
+size_t gk_GetMaxMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->max_hallocs;
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/ms_inttypes.h b/3rdParty/metis/metis-5.1.0/GKlib/ms_inttypes.h
new file mode 100644
index 000000000..e26204b7f
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/ms_inttypes.h
@@ -0,0 +1,301 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "ms_stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "I32d"
+#define PRIi32       "I32i"
+#define PRIdLEAST32  "I32d"
+#define PRIiLEAST32  "I32i"
+#define PRIdFAST32   "I32d"
+#define PRIiFAST32   "I32i"
+
+#define PRId64       "I64d"
+#define PRIi64       "I64i"
+#define PRIdLEAST64  "I64d"
+#define PRIiLEAST64  "I64i"
+#define PRIdFAST64   "I64d"
+#define PRIiFAST64   "I64i"
+
+#define PRIdMAX     "I64d"
+#define PRIiMAX     "I64i"
+
+#define PRIdPTR     "Id"
+#define PRIiPTR     "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "I32o"
+#define PRIu32       "I32u"
+#define PRIx32       "I32x"
+#define PRIX32       "I32X"
+#define PRIoLEAST32  "I32o"
+#define PRIuLEAST32  "I32u"
+#define PRIxLEAST32  "I32x"
+#define PRIXLEAST32  "I32X"
+#define PRIoFAST32   "I32o"
+#define PRIuFAST32   "I32u"
+#define PRIxFAST32   "I32x"
+#define PRIXFAST32   "I32X"
+
+#define PRIo64       "I64o"
+#define PRIu64       "I64u"
+#define PRIx64       "I64x"
+#define PRIX64       "I64X"
+#define PRIoLEAST64  "I64o"
+#define PRIuLEAST64  "I64u"
+#define PRIxLEAST64  "I64x"
+#define PRIXLEAST64  "I64X"
+#define PRIoFAST64   "I64o"
+#define PRIuFAST64   "I64u"
+#define PRIxFAST64   "I64x"
+#define PRIXFAST64   "I64X"
+
+#define PRIoMAX     "I64o"
+#define PRIuMAX     "I64u"
+#define PRIxMAX     "I64x"
+#define PRIXMAX     "I64X"
+
+#define PRIoPTR     "Io"
+#define PRIuPTR     "Iu"
+#define PRIxPTR     "Ix"
+#define PRIXPTR     "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/ms_stat.h b/3rdParty/metis/metis-5.1.0/GKlib/ms_stat.h
new file mode 100644
index 000000000..a1ef6faf7
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/ms_stat.h
@@ -0,0 +1,22 @@
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MS_STAT_H_
+#define _MS_STAT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <sys/stat.h>
+/* Test macros for file types.  */
+
+#define __S_ISTYPE(mode, mask)  (((mode) & S_IFMT) == (mask))
+
+#define S_ISDIR(mode)    __S_ISTYPE((mode), S_IFDIR)
+#define S_ISCHR(mode)    __S_ISTYPE((mode), S_IFCHR)
+#define S_ISBLK(mode)    __S_ISTYPE((mode), S_IFBLK)
+#define S_ISREG(mode)    __S_ISTYPE((mode), S_IFREG)
+
+#endif 
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/ms_stdint.h b/3rdParty/metis/metis-5.1.0/GKlib/ms_stdint.h
new file mode 100644
index 000000000..7e200dc6f
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/ms_stdint.h
@@ -0,0 +1,222 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   extern "C++" {
+#endif 
+#     include <wchar.h>
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   }
+#endif
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+typedef __int8            int8_t;
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef __int64           intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef int               intptr_t;
+   typedef unsigned int      uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/omp.c b/3rdParty/metis/metis-5.1.0/GKlib/omp.c
new file mode 100644
index 000000000..bdd543acf
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/omp.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * omp.c
+ *
+ * This file contains "fake" implementations of OpenMP's runtime libraries
+ *
+ */
+
+#include <GKlib.h>
+
+#ifdef GK_NOOPENMP  /* remove those for now */
+#if !defined(_OPENMP)
+void omp_set_num_threads(int num_threads) { return; }
+int omp_get_num_threads(void) { return 1; }
+int omp_get_max_threads(void) { return 1; }
+int omp_get_thread_num(void) { return 0; }
+int omp_get_num_procs(void) { return 1; }
+int omp_in_parallel(void) { return 0; }
+void omp_set_dynamic(int num_threads) { return; }
+int omp_get_dynamic(void) { return 0; }
+void omp_set_nested(int nested) { return; }
+int omp_get_nested(void) { return 0; }
+#endif
+#endif
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/pdb.c b/3rdParty/metis/metis-5.1.0/GKlib/pdb.c
new file mode 100644
index 000000000..b4d222653
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/pdb.c
@@ -0,0 +1,460 @@
+/************************************************************************/
+/*! \file pdb.c
+
+\brief Functions for parsing pdb files.
+
+Pdb reader (parser).  Loads arrays of pointers for easy backbone access.
+
+\date Started 10/20/06
+\author Kevin
+\version $Id: pdb.c 10711 2011-08-31 22:23:04Z karypis $
+*/
+/************************************************************************/
+#include <GKlib.h>
+
+/************************************************************************/
+/*! \brief Converts three-letter amino acid codes to one-leter codes.
+ 
+This function takes a three letter \c * and converts it to a single \c 
+
+\param res is the three-letter code to be converted.
+\returns A \c representing the amino acid.
+*/
+/************************************************************************/
+char gk_threetoone(char *res) { /* {{{ */
+	/* make sure the matching works */
+	res[0] = toupper(res[0]);
+	res[1] = toupper(res[1]);
+	res[2] = toupper(res[2]);
+	if(strcmp(res,"ALA") == 0) {
+		return 'A';
+	}
+	else if(strcmp(res,"CYS") == 0) {
+		return 'C';
+	}
+	else if(strcmp(res,"ASP") == 0) {
+		return 'D';
+	}
+	else if(strcmp(res,"GLU") == 0) {
+		return 'E';
+	}
+	else if(strcmp(res,"PHE") == 0) {
+		return 'F';
+	}
+	else if(strcmp(res,"GLY") == 0) {
+		return 'G';
+	}
+	else if(strcmp(res,"HIS") == 0) {
+		return 'H';
+	}
+	else if(strcmp(res,"ILE") == 0) {
+		return 'I';
+	}
+	else if(strcmp(res,"LYS") == 0) {
+		return 'K';
+	}
+	else if(strcmp(res,"LEU") == 0) {
+		return 'L';
+	}
+	else if(strcmp(res,"MET") == 0) {
+		return 'M';
+	}
+	else if(strcmp(res,"ASN") == 0) {
+		return 'N';
+	}
+	else if(strcmp(res,"PRO") == 0) {
+		return 'P';
+	}
+	else if(strcmp(res,"GLN") == 0) {
+		return 'Q';
+	}
+	else if(strcmp(res,"ARG") == 0) {
+		return 'R';
+	}
+	else if(strcmp(res,"SER") == 0) {
+		return 'S';
+	}
+	else if(strcmp(res,"THR") == 0) {
+		return 'T';
+	}
+	else if(strcmp(res,"SCY") == 0) {
+		return 'U';
+	}
+	else if(strcmp(res,"VAL") == 0) {
+		return 'V';
+	}
+	else if(strcmp(res,"TRP") == 0) {
+		return 'W';
+	}
+	else if(strcmp(res,"TYR") == 0) {
+		return 'Y';
+	}
+	else  {
+		return 'X';
+	}
+} /* }}} */
+
+/************************************************************************/
+/*! \brief Frees the memory of a pdbf structure.
+ 
+This function takes a pdbf pointer and frees all the memory below it. 
+
+\param p is the pdbf structure to be freed.
+*/
+/************************************************************************/
+void gk_freepdbf(pdbf *p) { /* {{{ */
+	int i;
+	if(p != NULL) {
+		gk_free((void **)&p->resSeq, LTERM);
+		for(i=0; i<p->natoms; i++) {
+			gk_free((void **)&p->atoms[i].name, &p->atoms[i].resname, LTERM);
+    }
+		for(i=0; i<p->nresidues; i++) {
+      gk_free((void *)&p->threeresSeq[i], LTERM);
+    }
+		/* this may look like it's wrong, but it's just a 1-d array of pointers, and
+			 the pointers themselves are freed above */
+	  gk_free((void **)&p->bbs, &p->cas, &p->atoms, &p->cm, &p->threeresSeq, LTERM);
+	}
+	gk_free((void **)&p, LTERM);
+} /* }}} */
+
+/************************************************************************/
+/*! \brief Reads a pdb file into a pdbf structure
+ 
+This function allocates a pdbf structure and reads the file fname into 
+that structure.
+
+\param fname is the file name to be read
+\returns A filled pdbf structure.
+*/
+/************************************************************************/
+pdbf *gk_readpdbfile(char *fname) { /* {{{ */
+	int i=0, res=0; 
+	char linetype[6];
+	int  aserial;
+	char aname[5] = "    \0";
+	char altLoc   = ' ';
+	char rname[4] = "   \0";
+	char chainid  = ' ';
+	char oldchainid  = ' ';
+	int  rserial;
+	int  oldRserial = -37;
+	char icode    = ' ';
+	char element  = ' ';
+	double x;
+	double y;
+	double z;
+	double avgx;
+	double avgy;
+	double avgz;
+	double opcy;
+	double tmpt;
+	char line[MAXLINELEN];
+	int corruption=0;
+  int nresatoms;
+
+	int atoms=0, residues=0, cas=0, bbs=0, firstres=1;
+	pdbf *toFill = gk_malloc(sizeof(pdbf),"fillme");
+	FILE *FPIN; 
+
+	FPIN = gk_fopen(fname,"r",fname);	
+	while(fgets(line, 256, FPIN))	{
+		sscanf(line,"%s ",linetype);
+		/* It seems the only reliable parts are through temperature, so we only use these parts */
+		/* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */
+		if(strstr(linetype, "ATOM") != NULL) {
+			sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
+			linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
+			sscanf(linetype, " %s ",linetype);
+			sscanf(aname, " %s ",aname);
+			sscanf(rname, " %s ",rname);
+			if(altLoc != ' ') {
+				corruption = corruption|CRP_ALTLOCS;	
+			}
+
+			if(firstres == 1) {
+				oldRserial = rserial;
+				oldchainid = chainid;
+				residues++;
+				firstres = 0;
+			}
+			if(oldRserial != rserial) {
+				residues++;
+				oldRserial = rserial;
+			}
+			if(oldchainid != chainid) {
+				corruption = corruption|CRP_MULTICHAIN;
+			}
+			oldchainid = chainid;
+			atoms++;
+		  if(strcmp(aname,"CA") == 0) {
+				cas++;
+			}
+			if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || 
+         strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
+				bbs++;
+			}
+		}
+		else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
+			break;
+		}
+	}
+	fclose(FPIN);
+
+	/* printf("File has coordinates for %d atoms in %d residues\n",atoms,residues); */
+	toFill->natoms      = atoms;
+	toFill->ncas        = cas;
+	toFill->nbbs        = bbs;
+	toFill->nresidues   = residues;
+	toFill->resSeq      = (char *) gk_malloc (residues*sizeof(char),"residue seq");
+	toFill->threeresSeq = (char **)gk_malloc (residues*sizeof(char *),"residue seq");
+	toFill->atoms       = (atom *) gk_malloc (atoms*sizeof(atom),  "atoms");
+	toFill->bbs         = (atom **)gk_malloc (  bbs*sizeof(atom *),"bbs");
+	toFill->cas         = (atom **)gk_malloc (  cas*sizeof(atom *),"cas");
+	toFill->cm          = (center_of_mass *)gk_malloc(residues*sizeof(center_of_mass),"center of mass");
+	res=0; firstres=1; cas=0; bbs=0; i=0; 
+  avgx = 0.0; avgy = 0.0; avgz = 0.0;
+  nresatoms = 0;
+
+	FPIN = gk_fopen(fname,"r",fname);	
+	while(fgets(line, 256, FPIN))	{
+		sscanf(line,"%s ",linetype);
+		/* It seems the only reliable parts are through temperature, so we only use these parts */
+		/* if(strstr(linetype, "ATOM") != NULL || strstr(linetype, "HETATM") != NULL) { */
+		if(strstr(linetype, "ATOM") != NULL ) {
+
+			/* to ensure our memory doesn't get corrupted by the biologists, we only read this far */
+			sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
+			linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
+			sscanf(aname, "%s",aname);
+			sscanf(rname, "%s",rname);
+
+			if(firstres == 1) {
+				toFill->resSeq[res] = gk_threetoone(rname);
+			  toFill->threeresSeq[res] = gk_strdup(rname); 
+				oldRserial = rserial;
+				res++;
+				firstres = 0;
+			}
+			if(oldRserial != rserial) {
+        /* we're changing residues. store the center of mass from the last one & reset */
+        toFill->cm[res-1].x = avgx/nresatoms;
+        toFill->cm[res-1].y = avgy/nresatoms;
+        toFill->cm[res-1].z = avgz/nresatoms;
+	      avgx = 0.0; avgy = 0.0; avgz = 0.0;
+        nresatoms = 0;
+        toFill->cm[res-1].name = toFill->resSeq[res-1];
+
+			  toFill->threeresSeq[res] = gk_strdup(rname); 
+				toFill->resSeq[res] = gk_threetoone(rname);
+				res++;
+				oldRserial = rserial;
+			}
+      avgx += x;
+      avgy += y;
+      avgz += z;
+      nresatoms++;
+
+			toFill->atoms[i].x       = x;
+			toFill->atoms[i].y       = y;
+			toFill->atoms[i].z       = z;
+			toFill->atoms[i].opcy    = opcy;
+			toFill->atoms[i].tmpt    = tmpt;
+			toFill->atoms[i].element = element;
+			toFill->atoms[i].serial  = aserial;
+			toFill->atoms[i].chainid = chainid;
+			toFill->atoms[i].altLoc  = altLoc;
+			toFill->atoms[i].rserial = rserial;
+			toFill->atoms[i].icode   = icode;
+			toFill->atoms[i].name    = gk_strdup(aname); 
+			toFill->atoms[i].resname = gk_strdup(rname); 
+			/* Set up pointers for the backbone and c-alpha shortcuts */
+			 if(strcmp(aname,"CA") == 0) {
+				toFill->cas[cas] = &(toFill->atoms[i]);
+				cas++;
+			}
+			if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
+				toFill->bbs[bbs] = &(toFill->atoms[i]);
+				bbs++;
+			}
+			i++;
+		}
+		else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
+			break;
+		}
+	}
+  /* get that last average */
+  toFill->cm[res-1].x = avgx/nresatoms;
+  toFill->cm[res-1].y = avgy/nresatoms;
+  toFill->cm[res-1].z = avgz/nresatoms;
+	/* Begin test code */
+	if(cas != residues) {
+		printf("Number of residues and CA coordinates differs by %d (!)\n",residues-cas);
+		if(cas < residues) {
+			corruption = corruption|CRP_MISSINGCA;	
+		}
+		else if(cas > residues) {
+			corruption = corruption|CRP_MULTICA;	
+		}
+	}
+	if(bbs < residues*4) {
+		corruption = corruption|CRP_MISSINGBB;
+	}
+	else if(bbs > residues*4) {
+		corruption = corruption|CRP_MULTIBB;
+	}
+	fclose(FPIN);
+	toFill->corruption = corruption;
+	/* if(corruption == 0) 
+		printf("File was clean!\n"); */
+	return(toFill);
+} /* }}} */
+
+/************************************************************************/
+/*! \brief Writes the sequence of residues from a pdb file.
+ 
+This function takes a pdbf structure and a filename, and writes out 
+the amino acid sequence according to the atomic coordinates.  The output
+is in fasta format.
+
+
+\param p is the pdbf structure with the sequence of interest
+\param fname is the file name to be written
+*/
+/************************************************************************/
+void gk_writefastafrompdb(pdbf *pb, char *fname) {
+  int i;
+  FILE *FPOUT;
+  
+  FPOUT = gk_fopen(fname,"w",fname);
+  fprintf(FPOUT,"> %s\n",fname);
+
+  for(i=0; i<pb->nresidues; i++) 
+    fprintf(FPOUT,"%c",pb->resSeq[i]);
+
+  fprintf(FPOUT,"\n");
+  fclose(FPOUT);
+}
+
+/************************************************************************/
+/*! \brief Writes all centers of mass in pdb-format to file fname.
+ 
+This function takes a pdbf structure and writes out the calculated 
+mass center information to file fname as though each one was a c-alpha.
+
+\param p is the pdbf structure to write out
+\param fname is the file name to be written
+*/
+/************************************************************************/
+void gk_writecentersofmass(pdbf *p, char *fname) {
+	int i;
+	FILE *FPIN; 
+	FPIN = gk_fopen(fname,"w",fname);	
+	for(i=0; i<p->nresidues; i++) {
+		 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c   %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
+		"ATOM  ",i,"CA",' ',p->threeresSeq[i],' ',i,' ',p->cm[i].x,p->cm[i].y,p->cm[i].z,1.0,-37.0); 
+	}
+	fclose(FPIN);
+}
+
+/************************************************************************/
+/*! \brief Writes all atoms in p in pdb-format to file fname.
+ 
+This function takes a pdbf structure and writes out all the atom 
+information to file fname.
+
+\param p is the pdbf structure to write out
+\param fname is the file name to be written
+*/
+/************************************************************************/
+void gk_writefullatom(pdbf *p, char *fname) {
+	int i;
+	FILE *FPIN; 
+	FPIN = gk_fopen(fname,"w",fname);	
+	for(i=0; i<p->natoms; i++) {
+		 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c   %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
+		"ATOM  ",p->atoms[i].serial,p->atoms[i].name,p->atoms[i].altLoc,p->atoms[i].resname,p->atoms[i].chainid,p->atoms[i].rserial,p->atoms[i].icode,p->atoms[i].x,p->atoms[i].y,p->atoms[i].z,p->atoms[i].opcy,p->atoms[i].tmpt); 
+	}
+	fclose(FPIN);
+}
+
+/************************************************************************/
+/*! \brief Writes out all the backbone atoms of a structure in pdb format
+ 
+This function takes a pdbf structure p and writes only the backbone atoms
+to a filename fname.
+
+\param p is the pdb structure to write out.
+\param fname is the file name to be written.
+*/
+/************************************************************************/
+void gk_writebackbone(pdbf *p, char *fname) {
+	int i;
+	FILE *FPIN; 
+	FPIN = gk_fopen(fname,"w",fname);	
+	for(i=0; i<p->nbbs; i++) {
+		 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c   %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
+		"ATOM  ",p->bbs[i]->serial,p->bbs[i]->name,p->bbs[i]->altLoc,p->bbs[i]->resname,p->bbs[i]->chainid,p->bbs[i]->rserial,p->bbs[i]->icode,p->bbs[i]->x,p->bbs[i]->y,p->bbs[i]->z,p->bbs[i]->opcy,p->bbs[i]->tmpt); 
+	}
+	fclose(FPIN);
+}
+
+/************************************************************************/
+/*! \brief Writes out all the alpha carbon atoms of a structure 
+ 
+This function takes a pdbf structure p and writes only the alpha carbon 
+atoms to a filename fname.
+
+\param p is the pdb structure to write out.
+\param fname is the file name to be written.
+*/
+/************************************************************************/
+void gk_writealphacarbons(pdbf *p, char *fname) {
+	int i;
+	FILE *FPIN; 
+	FPIN = gk_fopen(fname,"w",fname);	
+	for(i=0; i<p->ncas; i++) {
+		 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c   %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
+		"ATOM  ",p->cas[i]->serial,p->cas[i]->name,p->cas[i]->altLoc,p->cas[i]->resname,p->cas[i]->chainid,p->cas[i]->rserial,p->cas[i]->icode,p->cas[i]->x,p->cas[i]->y,p->cas[i]->z,p->cas[i]->opcy,p->cas[i]->tmpt); 
+	}
+	fclose(FPIN);
+}
+
+/************************************************************************/
+/*! \brief Decodes the corruption bitswitch and prints any problems
+ 
+Due to the totally unreliable nature of the pdb format, reading a pdb
+file stores a corruption bitswitch, and this function decodes that switch
+and prints the result on stdout.
+
+\param p is the pdb structure to write out.
+\param fname is the file name to be written.
+*/
+/************************************************************************/
+void gk_showcorruption(pdbf *p) {
+	int corruption = p->corruption;
+	if(corruption&CRP_ALTLOCS)
+		printf("Multiple coordinate sets for at least one atom\n");
+	if(corruption&CRP_MISSINGCA) 
+		printf("Missing coordiantes for at least one CA atom\n");
+	if(corruption&CRP_MISSINGBB) 
+		printf("Missing coordiantes for at least one backbone atom (N,CA,C,O)\n");
+	if(corruption&CRP_MULTICHAIN) 
+		printf("File contains coordinates for multiple chains\n");
+	if(corruption&CRP_MULTICA) 
+		printf("Multiple CA atoms found for the same residue (could be alternate locators)\n");
+	if(corruption&CRP_MULTICA) 
+		printf("Multiple copies of backbone atoms found for the same residue (could be alternate locators)\n");
+}
+			/* sscanf(line, "%6s%5d%*1c%4s%1c%3s%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf%*6c%4s%2s%2s\n",
+			linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,segId,element,charge);
+			printf(".%s.%s.%s.\n",segId,element,charge);
+			printf("%-6s%5d%-1s%-4s%1c%3s%1s%1c%4d%1c%3s%8.3lf%8.3lf%8.3lf%6.2f%6.2f%6s%4s%2s%2s\n",
+			linetype,aserial," ",aname,altLoc,rname," ",chainid,rserial,icode," ",x,y,z,opcy,tmpt," ",segId,element,charge); */
+
+			/* and we could probably get away with this using astral files, */
+			/* sscanf(line, "%6s%5d%*1c%4s%1c%3s%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf%*6c%6s\n",
+			linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,element);
+			printf("%-6s%5d%-1s%-4s%1c%3s%1s%1c%4d%1c%3s%8.3lf%8.3lf%8.3lf%6.2f%6.2f%6s%6s\n",
+			linetype,aserial," ",aname,altLoc,rname," ",chainid,rserial,icode," ",x,y,z,opcy,tmpt," ",element); */
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/pqueue.c b/3rdParty/metis/metis-5.1.0/GKlib/pqueue.c
new file mode 100644
index 000000000..2fb8515d2
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/pqueue.c
@@ -0,0 +1,25 @@
+/*!
+\file  pqueue.c
+\brief This file implements various max-priority queues.
+
+The priority queues are generated using the GK_MKPQUEUE macro.
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various max priority queues */
+/*************************************************************************/
+#define key_gt(a, b) ((a) > (b))
+GK_MKPQUEUE(gk_ipq,   gk_ipq_t,   gk_ikv_t,   int,      gk_idx_t, gk_ikvmalloc,   INT_MAX,    key_gt)
+GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t,  gk_idx_t, gk_i32kvmalloc, INT32_MAX,  key_gt)
+GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t,  gk_idx_t, gk_i64kvmalloc, INT64_MAX,  key_gt)
+GK_MKPQUEUE(gk_fpq,   gk_fpq_t,   gk_fkv_t,   float,    gk_idx_t, gk_fkvmalloc,   FLT_MAX,    key_gt)
+GK_MKPQUEUE(gk_dpq,   gk_dpq_t,   gk_dkv_t,   double,   gk_idx_t, gk_dkvmalloc,   DBL_MAX,    key_gt)
+GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt)
+#undef key_gt
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/random.c b/3rdParty/metis/metis-5.1.0/GKlib/random.c
new file mode 100644
index 000000000..d18e7188b
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/random.c
@@ -0,0 +1,134 @@
+/*!
+\file  
+\brief Various routines for providing portable 32 and 64 bit random number
+       generators.
+
+\date   Started 5/17/2007
+\author George
+\version\verbatim $Id: random.c 11793 2012-04-04 21:03:02Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various random number functions */
+/*************************************************************************/
+GK_MKRANDOM(gk_c,   size_t, char)
+GK_MKRANDOM(gk_i,   size_t, int)
+GK_MKRANDOM(gk_f,   size_t, float)
+GK_MKRANDOM(gk_d,   size_t, double)
+GK_MKRANDOM(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM(gk_z,   size_t, ssize_t)
+
+
+
+/*************************************************************************/
+/*! GKlib's built in random number generator for portability across 
+    different architectures */
+/*************************************************************************/
+#ifdef USE_GKRAND
+/* 
+   A C-program for MT19937-64 (2004/9/29 version).
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   This is a 64-bit version of Mersenne Twister pseudorandom number
+   generator.
+
+   Before using, initialize the state by using init_genrand64(seed)  
+   or init_by_array64(init_key, key_length).
+
+   Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define NN 312
+#define MM 156
+#define MATRIX_A 0xB5026F5AA96619E9ULL
+#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
+#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
+
+
+/* The array for the state vector */
+static uint64_t mt[NN]; 
+/* mti==NN+1 means mt[NN] is not initialized */
+static int mti=NN+1; 
+#endif /* USE_GKRAND */
+
+/* initializes mt[NN] with a seed */
+void gk_randinit(uint64_t seed)
+{
+#ifdef USE_GKRAND
+  mt[0] = seed;
+  for (mti=1; mti<NN; mti++) 
+    mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
+#else
+  srand((unsigned int) seed);
+#endif
+}
+
+
+/* generates a random number on [0, 2^64-1]-interval */
+uint64_t gk_randint64(void)
+{
+#ifdef USE_GKRAND
+  int i;
+  unsigned long long x;
+  static uint64_t mag01[2]={0ULL, MATRIX_A};
+
+  if (mti >= NN) { /* generate NN words at one time */
+    /* if init_genrand64() has not been called, */
+    /* a default initial seed is used     */
+    if (mti == NN+1) 
+      gk_randinit(5489ULL); 
+
+    for (i=0; i<NN-MM; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    for (; i<NN-1; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    x = (mt[NN-1]&UM)|(mt[0]&LM);
+    mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+
+    mti = 0;
+  }
+
+  x = mt[mti++];
+
+  x ^= (x >> 29) & 0x5555555555555555ULL;
+  x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
+  x ^= (x << 37) & 0xFFF7EEE000000000ULL;
+  x ^= (x >> 43);
+
+  return x & 0x7FFFFFFFFFFFFFFF;
+#else
+  return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand()));
+#endif
+}
+
+/* generates a random number on [0, 2^32-1]-interval */
+uint32_t gk_randint32(void)
+{
+#ifdef USE_GKRAND
+  return (uint32_t)(gk_randint64() & 0x7FFFFFFF);
+#else
+  return (uint32_t)rand();
+#endif
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/rw.c b/3rdParty/metis/metis-5.1.0/GKlib/rw.c
new file mode 100644
index 000000000..7cd4391a0
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/rw.c
@@ -0,0 +1,103 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines that perform random-walk based operations
+          on graphs stored as gk_csr_t matrices.
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Computes the (personalized) page-rank of the vertices in a graph.
+
+  \param mat is the matrix storing the graph.
+  \param lamda is the restart probability.
+  \param eps is the error tolerance for convergance.
+  \param max_niter is the maximum number of allowed iterations.
+  \param pr on entry stores the restart distribution of the vertices. 
+         This allows for the computation of personalized page-rank scores 
+         by appropriately setting that parameter. 
+         On return, pr stores the computed page ranks.
+ 
+  \returns the number of iterations that were performed.
+*/
+/**************************************************************************/
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
+{
+  ssize_t i, j, k, iter, nrows;
+  double *rscale, *prold, *prnew, *prtmp;
+  double fromsinks, error;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  prold  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
+  prnew  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
+  rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
+
+  /* compute the scaling factors to get adjacency weights into transition 
+     probabilities */
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++)
+      rscale[i] += rowval[j];
+    if (rscale[i] > 0)
+      rscale[i] = 1.0/rscale[i];
+  }
+
+  /* the restart distribution is the initial pr scores */
+  for (i=0; i<nrows; i++)
+    prnew[i] = pr[i];
+
+  /* get into the PR iteration */
+  for (iter=0; iter<max_niter; iter++) {
+    gk_SWAP(prnew, prold, prtmp);
+    gk_dset(nrows, 0.0, prnew);
+
+    /* determine the total current PR score of the sinks so that you 
+       can distribute them to all nodes according to the restart 
+       distribution. */
+    for (fromsinks=0.0, i=0; i<nrows; i++) {
+      if (rscale[i] == 0) 
+        fromsinks += prold[i];
+    }
+
+    /* push random-walk scores to the outlinks */
+    for (i=0; i<nrows; i++) {
+      for (j=rowptr[i]; j<rowptr[i+1]; j++)
+        prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
+    }
+
+    /* apply the restart conditions */
+    for (i=0; i<nrows; i++) {
+      prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
+    }
+
+    /* compute the error */
+    for (error=0.0, i=0; i<nrows; i++) 
+      error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
+
+    //printf("nrm1: %le  maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
+
+    if (error < eps)
+      break;
+  }
+
+  /* store the computed pr scores into pr for output */
+  for (i=0; i<nrows; i++)
+    pr[i] = prnew[i];
+
+  gk_free((void **)&prnew, &prold, &rscale, LTERM);
+  
+  return (int)(iter+1);
+
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/seq.c b/3rdParty/metis/metis-5.1.0/GKlib/seq.c
new file mode 100644
index 000000000..f267a3ea0
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/seq.c
@@ -0,0 +1,174 @@
+/*
+ *
+ * Sequence handler library by Huzefa Rangwala
+ * Date : 03.01.2007
+ *
+ *
+ *
+ */
+
+
+#include <GKlib.h>
+
+
+
+
+/*********************************************************/
+/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
+
+
+
+
+\param A pointer to gk_seq_t itself
+\returns null
+*/
+/***********************************************************************/
+
+void gk_seq_init(gk_seq_t *seq)
+{
+    
+    seq->len = 0;
+    seq->sequence = NULL;
+        
+    seq->pssm = NULL;
+    seq->psfm = NULL;
+    
+    seq->name = NULL;
+    
+}
+
+/***********************************************************************/
+/*! \brief This function creates the localizations for the various sequences
+
+\param    string i.e amino acids, nucleotides, sequences
+\returns  gk_i2cc2i_t variable
+*/
+/*********************************************************************/
+
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
+{
+    
+    
+    int nsymbols;
+    gk_idx_t i;
+    gk_i2cc2i_t *t;
+
+    nsymbols = strlen(alphabet);
+    t        = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
+    t->n     = nsymbols;
+    t->i2c   = gk_cmalloc(256, "gk_i2c_create_common");
+    t->c2i   = gk_imalloc(256, "gk_i2c_create_common");
+    
+
+    gk_cset(256, -1, t->i2c);
+    gk_iset(256, -1, t->c2i);
+    
+    for(i=0;i<nsymbols;i++){
+	t->i2c[i] = alphabet[i];
+	t->c2i[(int)alphabet[i]] = i;
+    }
+
+    return t;
+
+}
+
+
+/*********************************************************************/
+/*! \brief This function reads a pssm in the format of gkmod pssm
+
+\param file_name is the name of the pssm file
+\returns gk_seq_t
+*/
+/********************************************************************/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
+{
+    gk_seq_t *seq;
+    gk_idx_t i, j, ii;
+    size_t ntokens, nbytes, len;
+    FILE *fpin;
+    
+    
+    gk_Tokens_t tokens;
+    static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
+    static int PSSMWIDTH = 20;
+    char *header, line[MAXLINELEN];
+    gk_i2cc2i_t *converter;
+
+    header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
+    
+    converter = gk_i2cc2i_create_common(AAORDER);
+    
+    gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
+    len --;
+
+    seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
+    gk_seq_init(seq);
+    
+    seq->len = len;
+    seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
+    seq->pssm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    seq->psfm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    
+    seq->nsymbols = PSSMWIDTH;
+    seq->name     = gk_getbasename(filename);
+    
+    fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
+
+
+    /* Read the header line */
+    if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+      errexit("Unexpected end of file: %s\n", filename);
+    gk_strtoupper(line);
+    gk_strtokenize(line, " \t\n", &tokens);
+
+    for (i=0; i<PSSMWIDTH; i++)
+	header[i] = tokens.list[i][0];
+    
+    gk_freetokenslist(&tokens);
+    
+
+    /* Read the rest of the lines */
+    for (i=0, ii=0; ii<len; ii++) {
+	if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+          errexit("Unexpected end of file: %s\n", filename);
+	gk_strtoupper(line);
+	gk_strtokenize(line, " \t\n", &tokens);
+	
+	seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
+	
+	for (j=0; j<PSSMWIDTH; j++) {
+	    seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
+	    seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
+	}
+	
+      
+	
+	gk_freetokenslist(&tokens);
+	i++;
+    }
+    
+    seq->len = i; /* Reset the length if certain characters were skipped */
+    
+    gk_free((void **)&header, LTERM);
+    gk_fclose(fpin);
+
+    return seq;
+}
+
+
+/**************************************************************************/
+/*! \brief This function frees the memory allocated to the seq structure.
+ 
+\param   gk_seq_t
+\returns nothing
+*/
+/**************************************************************************/
+void gk_seq_free(gk_seq_t *seq)
+{
+    gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
+    gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
+    gk_free((void **)&seq->name, &seq->sequence, LTERM);
+    //gk_free((void **)&seq, LTERM);
+    gk_free((void **) &seq, LTERM);
+
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/sort.c b/3rdParty/metis/metis-5.1.0/GKlib/sort.c
new file mode 100644
index 000000000..bde30f5a5
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/sort.c
@@ -0,0 +1,327 @@
+/*!
+\file  sort.c
+\brief This file contains GKlib's various sorting routines
+
+These routines are implemented using the GKSORT macro that is defined
+in gk_qsort.h and is based on GNU's GLIBC qsort() implementation.
+
+Additional sorting routines can be created using the same way that
+these routines where defined.
+
+\date   Started 4/4/07
+\author George
+\version\verbatim $Id: sort.c 10796 2011-09-23 21:33:09Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in increasing order */
+/*************************************************************************/
+void gk_csorti(size_t n, char *base)
+{
+#define char_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(char, base, n, char_lt);
+#undef char_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in decreasing order */
+/*************************************************************************/
+void gk_csortd(size_t n, char *base)
+{
+#define char_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(char, base, n, char_gt);
+#undef char_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_isorti(size_t n, int *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_isortd(size_t n, int *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in increasing order */
+/*************************************************************************/
+void gk_fsorti(size_t n, float *base)
+{
+#define float_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(float, base, n, float_lt);
+#undef float_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in decreasing order */
+/*************************************************************************/
+void gk_fsortd(size_t n, float *base)
+{
+#define float_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(float, base, n, float_gt);
+#undef float_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in increasing order */
+/*************************************************************************/
+void gk_dsorti(size_t n, double *base)
+{
+#define double_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(double, base, n, double_lt);
+#undef double_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in decreasing order */
+/*************************************************************************/
+void gk_dsortd(size_t n, double *base)
+{
+#define double_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(double, base, n, double_gt);
+#undef double_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in increasing order */
+/*************************************************************************/
+void gk_idxsorti(size_t n, gk_idx_t *base)
+{
+#define idx_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_lt);
+#undef idx_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in decreasing order */
+/*************************************************************************/
+void gk_idxsortd(size_t n, gk_idx_t *base)
+{
+#define idx_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_gt);
+#undef idx_gt
+}
+
+
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in increasing order */
+/*************************************************************************/
+void gk_ckvsorti(size_t n, gk_ckv_t *base)
+{
+#define ckey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_lt);
+#undef ckey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in decreasing order */
+/*************************************************************************/
+void gk_ckvsortd(size_t n, gk_ckv_t *base)
+{
+#define ckey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_gt);
+#undef ckey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in increasing order */
+/*************************************************************************/
+void gk_ikvsorti(size_t n, gk_ikv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in decreasing order */
+/*************************************************************************/
+void gk_ikvsortd(size_t n, gk_ikv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in increasing order */
+/*************************************************************************/
+void gk_i32kvsorti(size_t n, gk_i32kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in decreasing order */
+/*************************************************************************/
+void gk_i32kvsortd(size_t n, gk_i32kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in increasing order */
+/*************************************************************************/
+void gk_i64kvsorti(size_t n, gk_i64kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in decreasing order */
+/*************************************************************************/
+void gk_i64kvsortd(size_t n, gk_i64kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in increasing order */
+/*************************************************************************/
+void gk_zkvsorti(size_t n, gk_zkv_t *base)
+{
+#define zkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_lt);
+#undef zkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in decreasing order */
+/*************************************************************************/
+void gk_zkvsortd(size_t n, gk_zkv_t *base)
+{
+#define zkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_gt);
+#undef zkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in increasing order */
+/*************************************************************************/
+void gk_fkvsorti(size_t n, gk_fkv_t *base)
+{
+#define fkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_lt);
+#undef fkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_fkvsortd(size_t n, gk_fkv_t *base)
+{
+#define fkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_gt);
+#undef fkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_dkv_t in increasing order */
+/*************************************************************************/
+void gk_dkvsorti(size_t n, gk_dkv_t *base)
+{
+#define dkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_lt);
+#undef dkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_dkvsortd(size_t n, gk_dkv_t *base)
+{
+#define dkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_gt);
+#undef dkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in increasing order */
+/*************************************************************************/
+void gk_skvsorti(size_t n, gk_skv_t *base)
+{
+#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_lt);
+#undef skey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in decreasing order */
+/*************************************************************************/
+void gk_skvsortd(size_t n, gk_skv_t *base)
+{
+#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_gt);
+#undef skey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in increasing order */
+/*************************************************************************/
+void gk_idxkvsorti(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt);
+#undef idxkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in decreasing order */
+/*************************************************************************/
+void gk_idxkvsortd(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt);
+#undef idxkey_gt
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/string.c b/3rdParty/metis/metis-5.1.0/GKlib/string.c
new file mode 100644
index 000000000..5d28452ba
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/string.c
@@ -0,0 +1,529 @@
+/************************************************************************/
+/*! \file 
+
+\brief Functions for manipulating strings.
+
+Various functions for manipulating strings. Some of these functions 
+provide new functionality, whereas others are drop-in replacements
+of standard functions (but with enhanced functionality).
+
+\date Started 11/1/99
+\author George
+\version $Id: string.c 10711 2011-08-31 22:23:04Z karypis $
+*/
+/************************************************************************/
+
+#include <GKlib.h>
+
+
+
+/************************************************************************/
+/*! \brief Replaces certain characters in a string.
+ 
+This function takes a string and replaces all the characters in the
+\c fromlist with the corresponding characters from the \c tolist. 
+That is, each occurence of <tt>fromlist[i]</tt> is replaced by 
+<tt>tolist[i]</tt>. 
+If the \c tolist is shorter than \c fromlist, then the corresponding 
+characters are deleted. The modifications on \c str are done in place. 
+It tries to provide a functionality similar to Perl's \b tr// function.
+
+\param str is the string whose characters will be replaced.
+\param fromlist is the set of characters to be replaced.
+\param tolist is the set of replacement characters .
+\returns A pointer to \c str itself.
+*/
+/************************************************************************/
+char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
+{
+  gk_idx_t i, j, k; 
+  size_t len, fromlen, tolen;
+
+  len     = strlen(str);
+  fromlen = strlen(fromlist);
+  tolen   = strlen(tolist);
+
+  for (i=j=0; i<len; i++) {
+    for (k=0; k<fromlen; k++) {
+      if (str[i] == fromlist[k]) {
+        if (k < tolen) 
+          str[j++] = tolist[k];
+        break;
+      }
+    }
+    if (k == fromlen)
+      str[j++] = str[i];
+  }
+  str[j] = '\0';
+
+  return str;
+}
+
+
+
+/************************************************************************/
+/*! \brief Regex-based search-and-replace function
+ 
+This function is a C implementation of Perl's <tt> s//</tt> regular-expression
+based substitution function.
+
+\param str 
+  is the input string on which the operation will be performed.
+\param pattern
+  is the regular expression for the pattern to be matched for substitution.
+\param replacement
+  is the replacement string, in which the possible captured pattern substrings
+  are referred to as $1, $2, ..., $9. The entire matched pattern is refered
+  to as $0.
+\param options
+  is a string specified options for the substitution operation. Currently the
+  <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are 
+  supported.
+\param new_str 
+  is a reference to a pointer that will store a pointer to the newly created 
+  string that results from the substitutions. This string is allocated via 
+  gk_malloc() and needs to be freed using gk_free(). The string is returned 
+  even if no substitutions were performed.
+\returns
+  If successful, it returns 1 + the number of substitutions that were performed.
+  Thus, if no substitutions were performed, the returned value will be 1.
+  Otherwise it returns 0. In case of error, a meaningful error message is 
+  returned in <tt>newstr</tt>, which also needs to be freed afterwards.
+*/
+/************************************************************************/
+int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
+      char **new_str)
+{
+  gk_idx_t i; 
+  int j, rc, flags, global, nmatches;
+  size_t len, rlen, nlen, offset, noffset;
+  regex_t re;
+  regmatch_t matches[10];
+
+  
+  /* Parse the options */
+  flags = REG_EXTENDED;
+  if (strchr(options, 'i') != NULL)
+    flags = flags | REG_ICASE;
+  global = (strchr(options, 'g') != NULL ? 1 : 0);
+
+
+  /* Compile the regex */
+  if ((rc = regcomp(&re, pattern, flags)) != 0) { 
+    len = regerror(rc, &re, NULL, 0);
+    *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
+    regerror(rc, &re, *new_str, len);
+    return 0;
+  }
+
+  /* Prepare the output string */
+  len = strlen(str);
+  nlen = 2*len;
+  noffset = 0;
+  *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
+
+
+  /* Get into the matching-replacing loop */
+  rlen = strlen(replacement);
+  offset = 0;
+  nmatches = 0;
+  do {
+    rc = regexec(&re, str+offset, 10, matches, 0);
+
+    if (rc == REG_ESPACE) {
+      gk_free((void **)new_str, LTERM);
+      *new_str = gk_strdup("regexec ran out of memory.");
+      regfree(&re);
+      return 0;
+    }
+    else if (rc == REG_NOMATCH) {
+      if (nlen-noffset < len-offset) {
+        nlen += (len-offset) - (nlen-noffset);
+        *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+      }
+      strcpy(*new_str+noffset, str+offset);
+      noffset += (len-offset);
+      break;
+    }
+    else { /* A match was found! */
+      nmatches++;
+
+      /* Copy the left unmatched portion of the string */
+      if (matches[0].rm_so > 0) {
+        if (nlen-noffset < matches[0].rm_so) {
+          nlen += matches[0].rm_so - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
+        noffset += matches[0].rm_so;
+      }
+
+      /* Go and append the replacement string */
+      for (i=0; i<rlen; i++) {
+        switch (replacement[i]) {
+          case '\\':
+            if (i+1 < rlen) {
+              if (nlen-noffset < 1) {
+                nlen += nlen + 1;
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+              *new_str[noffset++] = replacement[++i];
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          case '$':
+            if (i+1 < rlen) {
+              j = (int)(replacement[++i] - '0');
+              if (j < 0 || j > 9) {
+                gk_free((void **)new_str, LTERM);
+                *new_str = gk_strdup("Error in captured subexpression specification.");
+                regfree(&re);
+                return 0;
+              }
+
+              if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
+                nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+
+              strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
+              noffset += matches[j].rm_eo-matches[j].rm_so;
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          default:
+            if (nlen-noffset < 1) {
+              nlen += nlen + 1;
+              *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+            }
+            (*new_str)[noffset++] = replacement[i];
+        }
+      }
+
+      /* Update the offset of str for the next match */
+      offset += matches[0].rm_eo;
+
+      if (!global) {
+        /* Copy the right portion of the string if no 'g' option */
+        if (nlen-noffset < len-offset) {
+          nlen += (len-offset) - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strcpy(*new_str+noffset, str+offset);
+        noffset += (len-offset);
+      }
+    }
+  } while (global);
+
+  (*new_str)[noffset] = '\0';
+
+  regfree(&re);
+  return nmatches + 1;
+
+}
+
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the end of the string.
+
+This function removes any trailing characters that are included in the
+\c rmlist. The trimming stops at the last character (i.e., first character 
+from the end) that is not in \c rmlist.  
+This function can be used to removed trailing spaces, newlines, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strhprune()
+*/
+/*************************************************************************/
+char *gk_strtprune(char *str, char *rmlist)
+{
+  gk_idx_t i, j;
+  size_t len;
+
+  len = strlen(rmlist);
+
+  for (i=strlen(str)-1; i>=0; i--) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  str[i+1] = '\0';
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the beginning of the string.
+
+This function removes any starting characters that are included in the
+\c rmlist. The trimming stops at the first character that is not in 
+\c rmlist.
+This function can be used to removed leading spaces, tabs, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strtprune()
+*/
+/*************************************************************************/
+char *gk_strhprune(char *str, char *rmlist)
+{
+  gk_idx_t i, j;
+  size_t len;
+
+  len = strlen(rmlist);
+
+  for (i=0; str[i]; i++) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  if (i>0) { /* If something needs to be removed */
+    for (j=0; str[i]; i++, j++)
+      str[j] = str[i];
+    str[j] = '\0';
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to upper case.
+
+This function converts a string to upper case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtolower()
+*/
+/*************************************************************************/
+char *gk_strtoupper(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to lower case.
+
+This function converts a string to lower case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtoupper()
+*/
+/*************************************************************************/
+char *gk_strtolower(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Duplicates a string
+
+This function is a replacement for C's standard <em>strdup()</em> function.
+The key differences between the two are that gk_strdup():
+  - uses the dynamic memory allocation routines of \e GKlib. 
+  - it correctly handles NULL input strings.
+
+The string that is returned must be freed by gk_free().
+
+\param orgstr is the string that will be duplicated.
+\returns A pointer to the newly created string.
+\sa gk_free()
+*/
+/*************************************************************************/
+char *gk_strdup(char *orgstr)
+{
+  int len;
+  char *str=NULL;
+
+  if (orgstr != NULL) {
+    len = strlen(orgstr)+1;
+    str = gk_malloc(len*sizeof(char), "gk_strdup: str");
+    strcpy(str, orgstr);
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Case insensitive string comparison.
+
+This function compares two strings for equality by ignoring the case of the
+strings. 
+
+\warning This function is \b not equivalent to a case-insensitive 
+         <em>strcmp()</em> function, as it does not return ordering 
+         information.
+
+\todo Remove the above warning.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval 1 if the strings are identical,
+\retval 0 otherwise.
+*/
+/*************************************************************************/
+int gk_strcasecmp(char *s1, char *s2)
+{
+  int i=0;
+
+  if (strlen(s1) != strlen(s2))
+    return 0;
+
+  while (s1[i] != '\0') {
+    if (tolower(s1[i]) != tolower(s2[i]))
+      return 0;
+    i++;
+  }
+
+  return 1;
+}
+
+
+/************************************************************************/
+/*! \brief Compare two strings in revere order
+
+This function is similar to strcmp but it performs the comparison as
+if the two strings were reversed.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
+*/
+/*************************************************************************/
+int gk_strrcmp(char *s1, char *s2)
+{
+  int i1 = strlen(s1)-1;
+  int i2 = strlen(s2)-1;
+
+  while ((i1 >= 0) && (i2 >= 0)) {
+    if (s1[i1] != s2[i2])
+      return (s1[i1] - s2[i2]);
+    i1--;
+    i2--;
+  }
+
+  /* i1 == -1 and/or i2 == -1 */
+
+  if (i1 < i2)
+    return -1;
+  if (i1 > i2)
+    return 1;
+  return 0;
+}
+
+
+
+/************************************************************************/
+/*! \brief Converts a time_t time into a string 
+
+This function takes a time_t-specified time and returns a string-formated
+representation of the corresponding time. The format of the string is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param time is the time to be converted.
+\return It returns a pointer to a statically allocated string that is 
+        over-written in successive calls of this function. If the 
+        conversion failed, it returns NULL.
+
+*/
+/*************************************************************************/
+char *gk_time2str(time_t time)
+{
+  static char datestr[128];
+  struct tm *tm;
+
+  tm = localtime(&time);
+
+  if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
+    return NULL;
+  else
+    return datestr;
+}
+
+
+
+#if !defined(WIN32) && !defined(__MINGW32__)
+/************************************************************************/
+/*! \brief Converts a date/time string into its equivalent time_t value
+
+This function takes date and/or time specification and converts it in
+the equivalent time_t representation. The conversion is done using the
+strptime() function. The format that gk_str2time() understands is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param str is the date/time string to be converted.
+\return If the conversion was successful it returns the time, otherwise 
+        it returns -1.
+*/
+/*************************************************************************/
+time_t gk_str2time(char *str)
+{
+  struct tm time;
+  time_t rtime;
+
+  memset(&time, '\0', sizeof(time));
+  
+  if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
+    return -1;
+
+  rtime = mktime(&time);
+  return (rtime < 0 ? 0 : rtime);
+}
+#endif
+
+
+/*************************************************************************
+* This function returns the ID of a particular string based on the 
+* supplied StringMap array
+**************************************************************************/
+int gk_GetStringID(gk_StringMap_t *strmap, char *key)
+{
+  int i;
+
+  for (i=0; strmap[i].name; i++) {
+    if (gk_strcasecmp(key, strmap[i].name))
+      return strmap[i].id;
+  }
+
+  return -1;
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/CMakeLists.txt b/3rdParty/metis/metis-5.1.0/GKlib/test/CMakeLists.txt
new file mode 100644
index 000000000..372b0e2f4
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Where the header files reside
+#include_directories(../)
+
+# Build program.
+add_executable(strings strings.c)
+add_executable(gksort gksort.c)
+add_executable(fis fis.c)
+add_executable(rw rw.c)
+add_executable(gkgraph gkgraph.c)
+foreach(prog strings gksort fis rw gkgraph)
+  target_link_libraries(${prog} GKlib)
+endforeach(prog)
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.in.old b/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.in.old
new file mode 100644
index 000000000..cac4f523a
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.in.old
@@ -0,0 +1,258 @@
+#*************************************************************************
+# Global flags
+#*************************************************************************
+gdb         = yes
+debug       = no
+memdbg      = no
+openmp      = no
+x86compiler = gcc
+
+VERNUM = 0.1.0
+
+
+
+#*************************************************************************
+# System-specific compilation flags
+#*************************************************************************
+# Get some basic information about the system that you are working on
+cputype = $(shell uname -m | sed "s/\\ /_/g")
+systype = $(shell uname)
+ifeq ($(findstring CYGWIN, $(systype)),CYGWIN)
+#  systype = CYGWIN
+  systype = MSWIN
+  cputype = x86
+endif
+
+
+GKLIBINCDIR   = $(HOME)/work/algorithms/GKlib/trunk/
+GKLIBBUILDDIR = $(HOME)/work/algorithms/GKlib/builds/$(systype)-$(cputype)
+
+
+ifeq ($(systype),MSWIN)
+  #-------------------------------------------------------------------
+  # These defs are very much Visual Studio Specific
+  #-------------------------------------------------------------------
+  #Compiler information
+  CC = cl
+  OPTFLAGS = /Ox
+  COPTIONS = -DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE 
+
+  #Compile input/output file specification
+  SOURCEFILE = /c $<
+  OUTPUTFILE = /Fo$@
+
+  #Output specification for executables
+  EXEOUTPUTFILE = /Fe$@   # This option is when cl is used for linking
+  #EXEOUTPUTFILE = /OUT:$@  # This option is used when link is used for linking
+
+  #Linker information
+  LDOPTIONS = /MT 
+  #LD = /cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ 8/VC/BIN/link
+  LD = cl 
+  MERGEMANIFEST = 
+
+  #Library creation information
+  AR = lib /OUT:$@ 
+  RANLIB =
+
+  ifeq ($(openmp),yes)
+    COPTIONS  += -D__OPENMP__ /openmp 
+    LDOPTIONS += /openmp
+    MERGEMANIFEST = vc_mt -manifest $@.manifest -outputresource:$@\;1
+  endif
+
+  #Library information
+  ifeq ($(cputype),i386)
+    LIBPLOTDIR = ../Libplot/Win32
+  else
+    LIBPLOTDIR = ../Libplot/Win64
+  endif
+  LIBS = $(LIBPLOTDIR)/libplot.lib  $(BUILDDIR)/libcluto.lib $(GKLIBBUILDDIR)/libGKlib.lib
+
+  # Standard file extensions 
+  OBJEXT = .obj
+  LIBEXT = .lib
+  EXEEXT = .exe
+else
+  ifeq ($(systype),Linux)
+    ifeq ($(x86compiler),gcc) 
+      #Compiler information
+      CC = gcc
+      OPTFLAGS = -O6 
+      COPTIONS = -DLINUX -D_FILE_OFFSET_BITS=64 -pedantic -std=c99  -pthread
+
+      #Linker information
+      LDOPTIONS = 
+      LD = gcc 
+      
+      MERGEMANIFEST = 
+
+      #Library creation information
+      AR = ar rv
+      RANLIB = ar -ts
+    else
+      #Compiler information
+      CC = icc
+      OPTFLAGS = -O3 
+      COPTIONS = -DLINUX -D_FILE_OFFSET_BITS=64 -std=c99 
+
+      #Linker information
+      LDOPTIONS = 
+      LD = icc 
+
+      #Library creation information
+      AR = ar rv
+      RANLIB = ar -ts
+
+      ifeq ($(openmp),yes)
+        COPTIONS  += -D__OPENMP__ -openmp -openmp-report2
+        LDOPTIONS += -openmp
+      endif
+    endif
+
+    #Library information
+    ifeq ($(cputype),x86_64) 
+      LIBPLOTDIR = ../Libplot/Linux64
+    else
+      LIBPLOTDIR = ../Libplot/Linux32
+    endif
+  endif
+
+
+  ifeq ($(systype),SunOS)
+    #Compiler information
+    CC = /opt/SUNWspro/bin/cc
+    OPTFLAGS = -xO4 
+    COPTIONS =-DSUNOS  
+
+    #Linker information
+    LDOPTIONS = 
+    LD = /opt/SUNWspro/bin/cc
+
+
+    #Library creation information
+    AR = ar rv
+    RANLIB = ar -ts
+
+    #Library information
+    LIBPLOTDIR = ../Libplot/SunOS
+  endif
+
+
+  ifeq ($(systype),Darwin)
+    #Compiler information
+    CC = gcc
+    OPTFLAGS = -O6 
+    COPTIONS = -DDARWIN -D_FILE_OFFSET_BITS=64 -pedantic -std=c99 
+
+    #Linker information
+    LDOPTIONS = -fvisibility=default
+    LD = gcc 
+
+    #Library creation information
+    AR = ar rv
+    RANLIB = ar -ts
+
+    #Library information
+    ifeq ($(cputype),i386)
+      LIBPLOTDIR = ../Libplot/Darwini386
+    else
+      LIBPLOTDIR = ../Libplot/DarwinPPC
+    endif
+  endif
+
+  ifeq ($(systype),CYGWIN)
+    #Compiler information
+    CC = gcc
+    OPTFLAGS = -O6
+    COPTIONS = -DCYGWIN -DWIN32 -D_FILE_OFFSET_BITS=64 -Wall -std=c99 -pedantic -mno-cygwin
+
+    #Linker information
+    LDOPTIONS = -mno-cygwin
+    LD = gcc
+
+    #Library creation information
+    AR = ar crv
+    RANLIB = ar -ts
+
+    #Library information
+    LIBPLOTDIR = ../Libplot/CYGWIN
+  endif
+
+
+  #-------------------------------------------------------------------
+  # These defs are common among the GNU/GCC based systems
+  #-------------------------------------------------------------------
+  #Compile input/output file specification
+  SOURCEFILE = -c $<
+  OUTPUTFILE = -o $@
+
+  #Output specification for executables
+  EXEOUTPUTFILE = -o $@
+
+  #Library creation information
+  AR = ar crv $@ 
+  RANLIB = ar -ts $@
+
+  #Libraries needed for linking
+  LIBSDIR  = -L$(BUILDDIR) -L$(GKLIBBUILDDIR) -L$(HOME)/local/lib
+  LIBS     = -lGKlib -lpcreposix -lpcre -lz -lm
+
+  # Standard file extensions 
+  OBJEXT = .o
+  LIBEXT = .a
+  EXEEXT = 
+endif
+
+
+#**************************************************************************
+DMALLOCINC =
+DMALLOCFLAGS =
+DEBUGFLAGS =
+
+ifeq ($(dmalloc),yes)
+  DMALLOCINC = -I$(HOME)/local/include
+  DMALLOCFLAGS = -DDMALLOC
+  OPTFLAGS = -g
+endif
+
+ifeq ($(debug),yes)
+  DEBUGFLAGS = -DDEBUG
+  OPTFLAGS = -g
+endif
+
+ifeq ($(gdb),yes)
+  OPTFLAGS += -g
+endif
+#**************************************************************************
+
+
+#**************************************************************************
+# Create the build directory if it does not exist
+#**************************************************************************
+ifeq ($(systype),Darwin)
+  BINDIR    = $(HOME)
+else
+  BINDIR    = $(HOME)/work/bin/$(systype)-$(cputype)
+  $(shell mkdir -p $(BINDIR))
+endif
+
+ifeq ($(openmp),no)
+  BUILDDIR    = ./builds/$(systype)-$(cputype)
+else
+  BUILDDIR    = ./builds/$(systype)-$(cputype)-openmp
+endif
+
+LIBBUILDDIR = $(BUILDDIR)/lib
+PRGBUILDDIR = $(BUILDDIR)/prg
+$(shell mkdir -p $(BUILDDIR))
+$(shell mkdir -p $(LIBBUILDDIR))
+$(shell mkdir -p $(PRGBUILDDIR))
+
+
+
+
+INCLUDES = -I./ -I$(GKLIBINCDIR) -I$(LIBPLOTDIR) -I$(HOME)/local/include 
+CFLAGS   = $(COPTIONS) $(OPTFLAGS) $(DEBUGFLAGS) $(INCLUDES)
+
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.old b/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.old
new file mode 100644
index 000000000..1ca357eef
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/Makefile.old
@@ -0,0 +1,39 @@
+include Makefile.in
+
+STRINGSOBJS  = $(PRGBUILDDIR)/strings$(OBJEXT)
+GKSORTOBJS   = $(PRGBUILDDIR)/gksort$(OBJEXT)
+FISOBJS      = $(PRGBUILDDIR)/fis$(OBJEXT)
+                
+HEADERS = $(wildcard $(GKLIBINCDIR)/*.h)
+
+
+default: $(BUILDDIR)/strings$(EXEEXT) $(BUILDDIR)/gksort$(EXEEXT) $(BUILDDIR)/fis$(EXEEXT)
+
+
+$(BUILDDIR)/strings$(EXEEXT): $(STRINGSOBJS) $(GKLIBBUILDDIR)/libGKlib.a
+	$(LD)  $(LDOPTIONS) $(EXEOUTPUTFILE) $(STRINGSOBJS) $(LIBSDIR) $(LIBS) ; $(MERGEMANIFEST)
+	chmod 744 $@
+
+$(BUILDDIR)/gksort$(EXEEXT): $(GKSORTOBJS) $(GKLIBBUILDDIR)/libGKlib.a
+	$(LD)  $(LDOPTIONS) $(EXEOUTPUTFILE) $(GKSORTOBJS) $(LIBSDIR) $(LIBS) ; $(MERGEMANIFEST)
+	chmod 744 $@
+
+$(BUILDDIR)/fis$(EXEEXT): $(FISOBJS) $(GKLIBBUILDDIR)/libGKlib.a
+	$(LD)  $(LDOPTIONS) $(EXEOUTPUTFILE) $(FISOBJS) $(LIBSDIR) $(LIBS) ; $(MERGEMANIFEST)
+	chmod 744 $@
+
+
+clean:
+	rm -rf $(PRGBUILDDIR) 
+
+realclean:
+	rm -rf $(PRGBUILDDIR) ;\
+        rm -rf $(BUILDDIR) ;
+
+
+$(STRINGSOBJS) : $(HEADERS) Makefile.in Makefile $(GKLIBBUILDDIR)/libGKlib.a
+
+
+$(PRGBUILDDIR)/%$(OBJEXT) : %.c
+	$(CC) $(CFLAGS) $(SOURCEFILE) $(OUTPUTFILE) 
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/fis.c b/3rdParty/metis/metis-5.1.0/GKlib/test/fis.c
new file mode 100644
index 000000000..084a4b6a1
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/fis.c
@@ -0,0 +1,286 @@
+/*!
+\file  
+\brief A simple frequent itemset discovery program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  ssize_t minlen, maxlen;
+  ssize_t minfreq, maxfreq;
+  char *filename;
+  int silent;
+  ssize_t nitemsets;
+  char *clabelfile;
+  char **clabels;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_MINLEN      1
+#define CMD_MAXLEN      2
+#define CMD_MINFREQ     3
+#define CMD_MAXFREQ     4
+#define CMD_SILENT      5
+#define CMD_CLABELFILE  6
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"minlen",        1,      0,      CMD_MINLEN},
+  {"maxlen",        1,      0,      CMD_MAXLEN},
+  {"minfreq",       1,      0,      CMD_MINFREQ},
+  {"maxfreq",       1,      0,      CMD_MAXFREQ},
+  {"silent",        0,      0,      CMD_SILENT},
+  {"clabels",       1,      0,      CMD_CLABELFILE},
+  {"help",          0,      0,      CMD_HELP},
+  {0,               0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: fis [options] <mat-file>",
+" ",
+" Required parameters",
+"  mat-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Cluto's .mat format.",
+" ",
+" Optional parameters",
+"  -minlen=int",
+"     Specifies the minimum length of the patterns. [default: 1]",
+" ",
+"  -maxlen=int",
+"     Specifies the maximum length of the patterns. [default: none]",
+" ",
+"  -minfreq=int",
+"     Specifies the minimum frequency of the patterns. [default: 10]",
+" ",
+"  -maxfreq=int",
+"     Specifies the maximum frequency of the patterns. [default: none]",
+" ",
+"  -silent",
+"     Does not print the discovered itemsets.",
+" ",
+"  -clabels=filename",
+"     Specifies the name of the file that stores the column labels.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: fis [options] <mat-file>",
+"          use 'fis -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+void print_an_itemset(void *stateptr, int nitems, int *itemind, 
+                      int ntrans, int *tranind);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i;
+  char line[8192];
+  FILE *fpin;
+  params_t *params;
+  gk_csr_t *mat;
+ 
+  params = parse_cmdline(argc, argv);
+  params->nitemsets = 0;
+
+  /* read the data */
+  mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+
+  /* read the column labels */
+  params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
+  if (params->clabelfile == NULL) {
+    for (i=0; i<mat->ncols; i++) {
+      sprintf(line, "%zd", i);
+      params->clabels[i] = gk_strdup(line);
+    }
+  }
+  else {
+    fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
+    for (i=0; i<mat->ncols; i++) {
+      if (fgets(line, 8192, fpin) == NULL)
+        errexit("Failed on fgets.\n");
+      params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
+    }
+    gk_fclose(fpin);
+  }
+
+
+  print_init_info(params, mat);
+
+  gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
+      params->minfreq, params->maxfreq, params->minlen, params->maxlen,
+      &print_an_itemset, (void *)params);
+
+  printf("Total itemsets found: %zd\n", params->nitemsets);
+
+  print_final_info(params);
+}  
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
+      params->minlen, params->maxlen, params->minfreq, params->maxfreq);
+
+  printf("\n");
+  printf("Finding patterns... -----------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->minlen     = 1;
+  params->maxlen     = -1;
+  params->minfreq    = 10;
+  params->maxfreq    = -1;
+  params->silent     = 0;
+  params->filename   = NULL;
+  params->clabelfile = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_MINLEN:
+        if (gk_optarg) params->minlen = atoi(gk_optarg);
+        break;
+      case CMD_MAXLEN:
+        if (gk_optarg) params->maxlen = atoi(gk_optarg);
+        break;
+      case CMD_MINFREQ:
+        if (gk_optarg) params->minfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXFREQ:
+        if (gk_optarg) params->maxfreq = atoi(gk_optarg);
+        break;
+
+      case CMD_SILENT:
+        params->silent = 1;
+        break;
+
+      case CMD_CLABELFILE:
+        if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->filename = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->filename))
+    errexit("input file %s does not exist.\n", params->filename);
+
+  return params;
+}
+
+
+
+/*************************************************************************/
+/*! This is the callback function for the itemset discovery routine */
+/*************************************************************************/
+void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans, 
+         int *transids)
+{
+  ssize_t i;
+  params_t *params;
+
+  params = (params_t *)stateptr;
+  params->nitemsets++;
+
+  if (!params->silent) {
+    printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
+    for (i=0; i<nitems; i++)
+      printf(" %s", params->clabels[itemids[i]]);
+    printf("\n");
+    for (i=0; i<ntrans; i++)
+      printf(" %d\n", transids[i]);
+    printf("\n");
+  }
+}
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/gkgraph.c b/3rdParty/metis/metis-5.1.0/GKlib/test/gkgraph.c
new file mode 100644
index 000000000..233620d9b
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/gkgraph.c
@@ -0,0 +1,351 @@
+/*!
+\file  
+\brief A simple frequent itemset discovery program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: gkgraph.c 11408 2012-01-25 15:05:58Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int type;
+  int niter;
+  float eps;
+  float lamda;
+
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_TYPE        4
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"type",       1,      0,      CMD_TYPE},
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: gkgraph [options] <graph-file> [<out-file>]",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the graph. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: gkgraph [options] <graph-file> [<out-file>]",
+"          use 'gkgraph -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+double compute_compactness(params_t *params, gk_graph_t *graph, int32_t *perm);
+void reorder_centroid(params_t *params, gk_graph_t *graph, int32_t *perm);
+void print_init_info(params_t *params, gk_graph_t *graph);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, v;
+  params_t *params;
+  gk_graph_t *graph, *pgraph;
+  int32_t *perm;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, 0, 0, 0);
+
+  /* display some basic stats */
+  print_init_info(params, graph);
+
+
+  /* determine the initial compactness of the graph */
+  printf("Initial compactness: %le\n", compute_compactness(params, graph, NULL));
+
+  /* compute the BFS ordering and re-order the graph */
+  //for (i=0; i<params->niter; i++) {
+  for (i=0; i<1; i++) {
+    v = RandomInRange(graph->nvtxs);
+    gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL);
+    printf("BFS from %8d. Compactness: %le\n", 
+        (int) v, compute_compactness(params, graph, perm));
+
+    pgraph = gk_graph_Reorder(graph, perm, NULL);
+    gk_graph_Write(pgraph, "bfs.metis", GK_GRAPH_FMT_METIS);
+    gk_graph_Free(&pgraph);
+
+    gk_graph_ComputeBestFOrdering(graph, v, params->type, &perm, NULL);
+    printf("BestF from %8d. Compactness: %le\n", 
+        (int) v, compute_compactness(params, graph, perm));
+
+    pgraph = gk_graph_Reorder(graph, perm, NULL);
+    gk_graph_Write(pgraph, "bestf.metis", GK_GRAPH_FMT_METIS);
+    gk_graph_Free(&pgraph);
+
+#ifdef XXX
+    for (j=0; j<params->niter; j++) {
+      reorder_centroid(params, graph, perm);
+      printf("\tAfter centroid; Compactness: %le\n", 
+          compute_compactness(params, graph, perm));
+    }
+
+    pgraph = gk_graph_Reorder(graph, perm, NULL);
+    gk_graph_Write(pgraph, "centroid.metis", GK_GRAPH_FMT_METIS);
+    gk_graph_Free(&pgraph);
+#endif
+    gk_free((void **)&perm, LTERM);
+  }
+
+  gk_graph_Free(&graph);
+  //gk_graph_Free(&pgraph);
+
+  print_final_info(params);
+}
+
+
+
+
+/*************************************************************************/
+/*! This function computes the compactness of the graph's adjacency list */
+/*************************************************************************/
+double compute_compactness(params_t *params, gk_graph_t *graph, int32_t *perm)
+{
+  int i, v, u, nvtxs;
+  ssize_t j, *xadj; 
+  int32_t *adjncy;
+  double compactness=0.0;
+  int *freq;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  freq = gk_ismalloc(nvtxs, 0, "compute_compactness: freq");
+
+  for (i=0; i<nvtxs; i++) {
+    v = (perm == NULL ? i : perm[i]);
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      u = (perm == NULL ? adjncy[j] : perm[adjncy[j]]);
+      compactness += fabs(v-u);
+      freq[gk_abs(v-u)]++;
+    }
+  }
+
+  /*
+  for (i=0; i<nvtxs; i++) {
+    if (freq[i] > 0) 
+      printf("%7d %6d\n", i, freq[i]);
+  }
+  */
+  printf("\tnsmall: %d\n", freq[1]+freq[2]+freq[3]);
+
+  return compactness/xadj[nvtxs];
+}
+
+
+/*************************************************************************/
+/*! This function uses a centroid-based approach to refine the ordering */
+/*************************************************************************/
+void reorder_centroid(params_t *params, gk_graph_t *graph, int32_t *perm)
+{
+  int i, v, u, nvtxs;
+  ssize_t j, *xadj; 
+  int32_t *adjncy;
+  gk_fkv_t *cand;
+  double displacement;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  cand = gk_fkvmalloc(nvtxs, "reorder_centroid: cand");
+
+  for (i=0; i<nvtxs; i++) {
+    v = perm[i];
+    displacement = 0.0;
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      u = perm[adjncy[j]];
+      displacement += u-v;
+      //displacement += sign(u-v, sqrt(fabs(u-v)));
+    }
+
+    cand[i].val = i;
+    cand[i].key = v + displacement*params->lamda/(xadj[i+1]-xadj[i]);
+  }
+
+  /* sort them based on the target position in increasing order */
+  gk_fkvsorti(nvtxs, cand);
+
+
+  /* derive the permutation from the ordered list */
+  gk_i32set(nvtxs, -1, perm);
+  for (i=0; i<nvtxs; i++) {
+    if (perm[cand[i].val] != -1)
+      errexit("Resetting perm[%d] = %d\n", cand[i].val, perm[cand[i].val]);
+    perm[cand[i].val] = i;
+  }
+
+  gk_free((void **)&cand, LTERM);
+}
+
+
+
+
+
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_graph_t *graph)
+{
+  printf("*******************************************************************************\n");
+  printf(" gkgraph\n\n");
+  printf("Graph Information ----------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %zd]\n", 
+      params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" type=%d, niter=%d, lamda=%f, eps=%e\n",
+      params->type, params->niter, params->lamda, params->eps);
+
+  printf("\n");
+  printf("Working... -----------------------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->type      = 1;
+  params->niter     = 1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.20;
+  params->infile    = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_TYPE:
+        if (gk_optarg) params->type = atoi(gk_optarg);
+        break;
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+
+  if (argc-gk_optind > 0) 
+    params->outfile = gk_strdup(argv[gk_optind++]);
+  else
+    params->outfile   = gk_strdup("gkgraph.out");
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/gksort.c b/3rdParty/metis/metis-5.1.0/GKlib/test/gksort.c
new file mode 100644
index 000000000..65438368f
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/gksort.c
@@ -0,0 +1,346 @@
+/*!
+\file  gksort.c
+\brief Testing module for the various sorting routines in GKlib
+
+\date   Started 4/4/2007
+\author George
+\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+#define N       10000
+
+/*************************************************************************/
+/*! Testing module for gk_?isort() routine */
+/*************************************************************************/
+void test_isort()
+{
+  gk_idx_t i;
+  int array[N];
+
+  /* test the increasing sort */
+  printf("Testing iisort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing disort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fsort() routine */
+/*************************************************************************/
+void test_fsort()
+{
+  gk_idx_t i;
+  float array[N];
+
+  /* test the increasing sort */
+  printf("Testing ifsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dfsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxsort() routine */
+/*************************************************************************/
+void test_idxsort()
+{
+  gk_idx_t i;
+  gk_idx_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxsorti...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxsortd...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?ikvsort() routine */
+/*************************************************************************/
+void test_ikvsort()
+{
+  gk_idx_t i;
+  gk_ikv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing ikvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing ikvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fkvsort() routine */
+/*************************************************************************/
+void test_fkvsort()
+{
+  gk_idx_t i;
+  gk_fkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing fkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing fkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?dkvsort() routine */
+/*************************************************************************/
+void test_dkvsort()
+{
+  gk_idx_t i;
+  gk_dkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing dkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?skvsort() routine */
+/*************************************************************************/
+void test_skvsort()
+{
+  gk_idx_t i;
+  gk_skv_t array[N];
+  char line[256];
+
+  /* test the increasing sort */
+  printf("Testing skvsorti...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (strcmp(array[i].key, array[i+1].key) > 0)
+      printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing skvsortd...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    /*printf("%s\n", array[i].key);*/
+    if (strcmp(array[i].key, array[i+1].key) < 0)
+      printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxkvsort() routine */
+/*************************************************************************/
+void test_idxkvsort()
+{
+  gk_idx_t i;
+  gk_idxkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+}
+
+
+
+
+int main()
+{
+  test_isort();
+  test_fsort();
+  test_idxsort();
+
+  test_ikvsort();
+  test_fkvsort();
+  test_dkvsort();
+  test_skvsort();
+  test_idxkvsort();
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/rw.c b/3rdParty/metis/metis-5.1.0/GKlib/test/rw.c
new file mode 100644
index 000000000..e338f89dd
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/rw.c
@@ -0,0 +1,307 @@
+/*!
+\file  
+\brief A simple frequent itemset discovery program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: rw.c 11387 2012-01-21 23:36:23Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int niter;
+  int ntvs;
+  int ppr;
+  float eps;
+  float lamda;
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_PPR         4
+#define CMD_NTVS        5
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"ppr",        1,      0,      CMD_PPR},
+  {"ntvs",       1,      0,      CMD_NTVS},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: rw [options] <graph-file> <out-file>",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -ppr=int",
+"     Specifies the source of the personalized PR. [default: -1]",
+" ",
+"  -ntvs=int",
+"     Specifies the number of test-vectors to compute. [default: -1]",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: rw [options] <graph-file> <out-file>",
+"          use 'rw -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, niter;
+  params_t *params;
+  gk_csr_t *mat;
+  FILE *fpout;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);
+
+  /* display some basic stats */
+  print_init_info(params, mat);
+
+
+
+  if (params->ntvs != -1) {
+    /* compute the pr for different randomly generated restart-distribution vectors */
+    float **prs;
+
+    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");
+
+    /* generate the random restart vectors */
+    for (j=0; j<params->ntvs; j++) {
+      for (i=0; i<mat->nrows; i++)
+        prs[j][i] = RandomInRange(931);
+      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);
+
+      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
+      printf("tvs#: %zd; niters: %zd\n", j, niter);
+    }
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (j=0; j<params->ntvs; j++) 
+        fprintf(fpout, "%.4e ", prs[j][i]);
+      fprintf(fpout, "\n");
+    }
+    gk_fclose(fpout);
+
+    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
+  }
+  else if (params->ppr != -1) {
+    /* compute the personalized pr from the specified vertex */
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");
+
+    pr[params->ppr-1] = 1.0;
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("ppr: %d; niters: %zd\n", params->ppr, niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) 
+      fprintf(fpout, "%.4e\n", pr[i]);
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+  else {
+    /* compute the standard pr */
+    int jmax;
+    float diff, maxdiff;
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("pr; niters: %zd\n", niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
+          maxdiff = diff;
+          jmax = mat->rowind[j];
+        }
+      }
+      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
+          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
+    }
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+
+  gk_csr_Free(&mat);
+
+  /* display some final stats */
+  print_final_info(params);
+}
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n",
+      params->niter, params->ntvs, params->ppr, params->lamda, params->eps);
+
+  printf("\n");
+  printf("Performing random walks... ----------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->niter     = 100;
+  params->ppr       = -1;
+  params->ntvs      = -1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.80;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_NTVS:
+        if (gk_optarg) params->ntvs = atoi(gk_optarg);
+        break;
+      case CMD_PPR:
+        if (gk_optarg) params->ppr = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 2) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  if (params->ppr != -1 && params->ntvs != -1)
+    errexit("Only one of the -ppr and -ntvs options can be specified.\n");
+
+  return params;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/test/strings.c b/3rdParty/metis/metis-5.1.0/GKlib/test/strings.c
new file mode 100644
index 000000000..b241d3ff0
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/test/strings.c
@@ -0,0 +1,82 @@
+/*!
+\file strings.c
+\brief Testing module for the string functions in GKlib
+
+\date Started 3/5/2007
+\author George
+\version\verbatim $Id: strings.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Testing module for gk_strstr_replace()  */
+/*************************************************************************/
+void test_strstr_replace()
+{
+  char *new_str;
+  int rc;
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w(\\w+)\\w\\b", "$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w+\\b", "word", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)(.*)-T(\\d+)", "$1$2-P$3", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(\\d+)", "number:$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)", "[$1]", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+
+}
+
+
+
+int main()
+{
+  test_strstr_replace();
+
+/*
+  {
+  int i;
+  for (i=0; i<1000; i++)
+    printf("%d\n", RandomInRange(3));
+  }
+*/
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/timers.c b/3rdParty/metis/metis-5.1.0/GKlib/timers.c
new file mode 100644
index 000000000..bb8f29620
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/timers.c
@@ -0,0 +1,52 @@
+/*!
+\file  timers.c
+\brief Various timing functions 
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_WClockSeconds(void)
+{
+#ifdef __GNUC__
+  struct timeval ctime;
+
+  gettimeofday(&ctime, NULL);
+
+  return (double)ctime.tv_sec + (double).000001*ctime.tv_usec;
+#else
+  return (double)time(NULL);
+#endif
+}
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_CPUSeconds(void)
+{
+//#ifdef __OPENMP__
+#ifdef __OPENMPXXXX__
+  return omp_get_wtime();
+#else
+  #if defined(WIN32) || defined(__MINGW32__)
+    return((double) clock()/CLOCKS_PER_SEC);
+  #else
+    struct rusage r;
+
+    getrusage(RUSAGE_SELF, &r);
+    return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec));
+  #endif
+#endif
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/tokenizer.c b/3rdParty/metis/metis-5.1.0/GKlib/tokenizer.c
new file mode 100644
index 000000000..5efd262db
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/tokenizer.c
@@ -0,0 +1,77 @@
+/*!
+\file  tokenizer.c
+\brief String tokenization routines
+
+This file contains various routines for splitting an input string into
+tokens and returning them in form of a list. The goal is to mimic perl's 
+split function.
+
+\date   Started 11/23/04
+\author George
+\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+/************************************************************************
+* This function tokenizes a string based on the user-supplied delimiters
+* list. The resulting tokens are returned into an array of strings.
+*************************************************************************/
+void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens)
+{
+  int i, ntoks, slen;
+
+  tokens->strbuf = gk_strdup(str);
+
+  slen  = strlen(str);
+  str   = tokens->strbuf;
+
+  /* Scan once to determine the number of tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      i++;
+
+    if (i == slen)
+      break;
+
+    ntoks++;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+
+
+  tokens->ntoks = ntoks;
+  tokens->list  = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list");
+
+
+  /* Scan a second time to mark and link the tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      str[i++] = '\0';
+
+    if (i == slen)
+      break;
+
+    tokens->list[ntoks++] = str+i;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+}
+
+
+/************************************************************************
+* This function frees the memory associated with a gk_Tokens_t
+*************************************************************************/
+void gk_freetokenslist(gk_Tokens_t *tokens)
+{
+  gk_free((void *)&tokens->list, &tokens->strbuf, LTERM);
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/GKlib/util.c b/3rdParty/metis/metis-5.1.0/GKlib/util.c
new file mode 100644
index 000000000..e75d68b51
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/GKlib/util.c
@@ -0,0 +1,108 @@
+/*!
+\file  util.c
+\brief Various utility routines
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: util.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************
+* This file randomly permutes the contents of an array.
+* flag == 0, don't initialize perm
+* flag == 1, set p[i] = i 
+**************************************************************************/
+void gk_RandomPermute(size_t n, int *p, int flag)
+{
+  gk_idx_t i, u, v;
+  int tmp;
+
+  if (flag == 1) {
+    for (i=0; i<n; i++)
+      p[i] = i;
+  }
+
+  for (i=0; i<n/2; i++) {
+    v = RandomInRange(n);
+    u = RandomInRange(n);
+    gk_SWAP(p[v], p[u], tmp);
+  }
+}
+
+
+/************************************************************************/
+/*!
+\brief Converts an element-based set membership into a CSR-format set-based
+       membership.
+
+For example, it takes an array such as part[] that stores where each 
+element belongs to and returns a pair of arrays (pptr[], pind[]) that 
+store in CSF format the list of elements belonging in each partition.
+
+\param n      
+  the number of elements in the array (e.g., # of vertices)
+\param range  
+  the cardinality of the set (e.g., # of partitions)
+\param array
+  the array that stores the per-element set membership
+\param ptr
+  the array that will store the starting indices in ind for
+  the elements of each set. This is filled by the routine and
+  its size should be at least range+1.
+\param ind
+  the array that stores consecutively which elements belong to
+  each set. The size of this array should be n.
+*/
+/************************************************************************/
+void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind)
+{
+  gk_idx_t i;
+
+  gk_iset(range+1, 0, ptr);
+
+  for (i=0; i<n; i++) 
+    ptr[array[i]]++;
+
+  /* Compute the ptr, ind structure */
+  MAKECSR(i, range, ptr);
+  for (i=0; i<n; i++)
+    ind[ptr[array[i]]++] = i;
+  SHIFTCSR(i, range, ptr);
+}
+
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+int gk_log2(int a)
+{
+  gk_idx_t i;
+
+  for (i=1; a > 1; i++, a = a>>1);
+  return i-1;
+}
+
+
+/*************************************************************************
+* This function checks if the argument is a power of 2
+**************************************************************************/
+int gk_ispow2(int a)
+{
+  return (a == (1<<gk_log2(a)));
+}
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+float gk_flog2(float a)
+{
+  return log(a)/log(2.0);
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/LICENSE.txt b/3rdParty/metis/metis-5.1.0/LICENSE.txt
new file mode 100644
index 000000000..f952fe297
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/LICENSE.txt
@@ -0,0 +1,18 @@
+
+Copyright & License Notice
+---------------------------
+
+Copyright 1995-2013, Regents of the University of Minnesota
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
+implied. See the License for the specific language governing 
+permissions and limitations under the License.
+
diff --git a/3rdParty/metis/metis-5.1.0/include/CMakeLists.txt b/3rdParty/metis/metis-5.1.0/include/CMakeLists.txt
new file mode 100644
index 000000000..9515a51b6
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/include/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(METIS_INSTALL)
+  install(FILES metis.h DESTINATION include)
+endif()
diff --git a/3rdParty/metis/metis-5.1.0/include/metis.h b/3rdParty/metis/metis-5.1.0/include/metis.h
new file mode 100644
index 000000000..000a418e1
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/include/metis.h
@@ -0,0 +1,354 @@
+/*!
+\file metis.h 
+\brief This file contains function prototypes and constant definitions for METIS
+ *
+\author George
+\date   Started 8/9/02
+\version\verbatim $Id$\endverbatim
+*/
+
+#ifndef _METIS_H_
+#define _METIS_H_
+
+#ifdef __clang__
+#pragma clang system_header
+#endif
+
+/****************************************************************************
+* A set of defines that can be modified by the user
+*****************************************************************************/
+
+/*--------------------------------------------------------------------------
+ Specifies the width of the elementary data type that will hold information
+ about vertices and their adjacency lists.
+
+ Possible values:
+   32 : Use 32 bit signed integers
+   64 : Use 64 bit signed integers
+
+ A width of 64 should be specified if the number of vertices or the total
+ number of edges in the graph exceed the limits of a 32 bit signed integer
+ i.e., 2^31-1.
+ Proper use of 64 bit integers requires that the c99 standard datatypes
+ int32_t and int64_t are supported by the compiler.
+ GCC does provides these definitions in stdint.h, but it may require some
+ modifications on other architectures.
+--------------------------------------------------------------------------*/
+#define IDXTYPEWIDTH 32
+
+
+/*--------------------------------------------------------------------------
+ Specifies the data type that will hold floating-point style information.
+
+ Possible values:
+   32 : single precission floating point (float)
+   64 : double precission floating point (double)
+--------------------------------------------------------------------------*/
+#define REALTYPEWIDTH 32
+
+
+
+/****************************************************************************
+* In principle, nothing needs to be changed beyond this point, unless the
+* int32_t and int64_t cannot be found in the normal places.
+*****************************************************************************/
+
+/* Uniform definitions for various compilers */
+#if defined(_MSC_VER)
+  #define COMPILER_MSC
+#endif
+#if defined(__ICC)
+  #define COMPILER_ICC
+#endif
+#if defined(__GNUC__)
+  #define COMPILER_GCC
+#endif
+
+/* Include c99 int definitions and need constants. When building the library,
+ * these are already defined by GKlib; hence the test for _GKLIB_H_ */
+#ifndef _GKLIB_H_
+#ifdef COMPILER_MSC
+#include <limits.h>
+
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+#define PRId32       "I32d"
+#define PRId64       "I64d"
+#define SCNd32       "ld"
+#define SCNd64       "I64d"
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#else
+#include <inttypes.h>
+#endif
+#endif
+
+
+/*------------------------------------------------------------------------
+* Setup the basic datatypes
+*-------------------------------------------------------------------------*/
+#if IDXTYPEWIDTH == 32
+  typedef int32_t idx_t;
+
+  #define IDX_MAX   INT32_MAX
+  #define IDX_MIN   INT32_MIN
+
+  #define SCIDX  SCNd32
+  #define PRIDX  PRId32
+
+  #define strtoidx      strtol
+  #define iabs          abs
+#elif IDXTYPEWIDTH == 64
+  typedef int64_t idx_t;
+
+  #define IDX_MAX   INT64_MAX
+  #define IDX_MIN   INT64_MIN
+
+  #define SCIDX  SCNd64
+  #define PRIDX  PRId64
+
+#ifdef COMPILER_MSC
+  #define strtoidx      _strtoi64
+#else
+  #define strtoidx      strtoll
+#endif
+  #define iabs          labs
+#else
+  #error "Incorrect user-supplied value fo IDXTYPEWIDTH"
+#endif
+
+
+#if REALTYPEWIDTH == 32
+  typedef float real_t;
+
+  #define SCREAL         "f"
+  #define PRREAL         "f"
+  #define REAL_MAX       FLT_MAX
+  #define REAL_MIN       FLT_MIN
+  #define REAL_EPSILON   FLT_EPSILON
+
+  #define rabs          fabsf
+  #define REALEQ(x,y) ((rabs((x)-(y)) <= FLT_EPSILON))
+
+#ifdef COMPILER_MSC
+  #define strtoreal     (float)strtod
+#else
+  #define strtoreal     strtof
+#endif
+#elif REALTYPEWIDTH == 64
+  typedef double real_t;
+
+  #define SCREAL         "lf"
+  #define PRREAL         "lf"
+  #define REAL_MAX       DBL_MAX
+  #define REAL_MIN       DBL_MIN
+  #define REAL_EPSILON   DBL_EPSILON
+
+  #define rabs          fabs
+  #define REALEQ(x,y) ((rabs((x)-(y)) <= DBL_EPSILON))
+
+  #define strtoreal     strtod
+#else
+  #error "Incorrect user-supplied value for REALTYPEWIDTH"
+#endif
+
+
+/*------------------------------------------------------------------------
+* Constant definitions 
+*-------------------------------------------------------------------------*/
+/* Metis's version number */
+#define METIS_VER_MAJOR         5
+#define METIS_VER_MINOR         1
+#define METIS_VER_SUBMINOR      0
+
+/* The maximum length of the options[] array */
+#define METIS_NOPTIONS          40
+
+
+
+/*------------------------------------------------------------------------
+* Function prototypes 
+*-------------------------------------------------------------------------*/
+
+#ifdef _WINDLL
+#define METIS_API(type) __declspec(dllexport) type __cdecl
+#elif defined(__cdecl)
+#define METIS_API(type) type __cdecl
+#else
+#define METIS_API(type) type
+#endif
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+METIS_API(int) METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, 
+                  idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, 
+                  idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, 
+                  idx_t *edgecut, idx_t *part);
+
+METIS_API(int) METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, 
+                  idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, 
+                  idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, 
+                  idx_t *edgecut, idx_t *part);
+
+METIS_API(int) METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+                  idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy);
+
+METIS_API(int) METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+                  idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy);
+
+METIS_API(int) METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                  idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, 
+                  idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart);
+
+METIS_API(int) METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                  idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, 
+                  real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, 
+                  idx_t *npart);
+
+METIS_API(int) METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+                  idx_t *options, idx_t *perm, idx_t *iperm);
+
+METIS_API(int) METIS_Free(void *ptr);
+
+METIS_API(int) METIS_SetDefaultOptions(idx_t *options);
+
+
+/* These functions are used by ParMETIS */
+
+METIS_API(int) METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+                   idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, 
+                   idx_t *sizes);
+
+METIS_API(int) METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, 
+                   idx_t *vwgt, idx_t *options, idx_t *sepsize, idx_t *part);
+
+METIS_API(int) METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy,
+                   idx_t *where, idx_t *hmarker, real_t ubfactor);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+/*------------------------------------------------------------------------
+* Enum type definitions 
+*-------------------------------------------------------------------------*/
+/*! Return codes */
+typedef enum {
+  METIS_OK              = 1,    /*!< Returned normally */
+  METIS_ERROR_INPUT     = -2,   /*!< Returned due to erroneous inputs and/or options */
+  METIS_ERROR_MEMORY    = -3,   /*!< Returned due to insufficient memory */
+  METIS_ERROR           = -4    /*!< Some other errors */
+} rstatus_et; 
+
+
+/*! Operation type codes */
+typedef enum {
+  METIS_OP_PMETIS,       
+  METIS_OP_KMETIS,
+  METIS_OP_OMETIS
+} moptype_et;
+
+
+/*! Options codes (i.e., options[]) */
+typedef enum {
+  METIS_OPTION_PTYPE,
+  METIS_OPTION_OBJTYPE,
+  METIS_OPTION_CTYPE,
+  METIS_OPTION_IPTYPE,
+  METIS_OPTION_RTYPE,
+  METIS_OPTION_DBGLVL,
+  METIS_OPTION_NITER,
+  METIS_OPTION_NCUTS,
+  METIS_OPTION_SEED,
+  METIS_OPTION_NO2HOP,
+  METIS_OPTION_MINCONN,
+  METIS_OPTION_CONTIG,
+  METIS_OPTION_COMPRESS,
+  METIS_OPTION_CCORDER,
+  METIS_OPTION_PFACTOR,
+  METIS_OPTION_NSEPS,
+  METIS_OPTION_UFACTOR,
+  METIS_OPTION_NUMBERING,
+
+  /* Used for command-line parameter purposes */
+  METIS_OPTION_HELP,
+  METIS_OPTION_TPWGTS,
+  METIS_OPTION_NCOMMON,
+  METIS_OPTION_NOOUTPUT,
+  METIS_OPTION_BALANCE,
+  METIS_OPTION_GTYPE,
+  METIS_OPTION_UBVEC
+} moptions_et;
+
+
+/*! Partitioning Schemes */
+typedef enum {
+  METIS_PTYPE_RB, 
+  METIS_PTYPE_KWAY                
+} mptype_et;
+
+/*! Graph types for meshes */
+typedef enum {
+  METIS_GTYPE_DUAL,
+  METIS_GTYPE_NODAL               
+} mgtype_et;
+
+/*! Coarsening Schemes */
+typedef enum {
+  METIS_CTYPE_RM,
+  METIS_CTYPE_SHEM
+} mctype_et;
+
+/*! Initial partitioning schemes */
+typedef enum {
+  METIS_IPTYPE_GROW,
+  METIS_IPTYPE_RANDOM,
+  METIS_IPTYPE_EDGE,
+  METIS_IPTYPE_NODE,
+  METIS_IPTYPE_METISRB
+} miptype_et;
+
+
+/*! Refinement schemes */
+typedef enum {
+  METIS_RTYPE_FM,
+  METIS_RTYPE_GREEDY,
+  METIS_RTYPE_SEP2SIDED,
+  METIS_RTYPE_SEP1SIDED
+} mrtype_et;
+
+
+/*! Debug Levels */
+typedef enum {
+  METIS_DBG_INFO       = 1,       /*!< Shows various diagnostic messages */
+  METIS_DBG_TIME       = 2,       /*!< Perform timing analysis */
+  METIS_DBG_COARSEN    = 4,	  /*!< Show the coarsening progress */
+  METIS_DBG_REFINE     = 8,	  /*!< Show the refinement progress */
+  METIS_DBG_IPART      = 16, 	  /*!< Show info on initial partitioning */
+  METIS_DBG_MOVEINFO   = 32, 	  /*!< Show info on vertex moves during refinement */
+  METIS_DBG_SEPINFO    = 64, 	  /*!< Show info on vertex moves during sep refinement */
+  METIS_DBG_CONNINFO   = 128,     /*!< Show info on minimization of subdomain connectivity */
+  METIS_DBG_CONTIGINFO = 256,     /*!< Show info on elimination of connected components */ 
+  METIS_DBG_MEMORY     = 2048,    /*!< Show info related to wspace allocation */
+} mdbglvl_et;
+
+
+/* Types of objectives */
+typedef enum {
+  METIS_OBJTYPE_CUT,
+  METIS_OBJTYPE_VOL,
+  METIS_OBJTYPE_NODE
+} mobjtype_et;
+
+
+
+#endif  /* _METIS_H_ */
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/CMakeLists.txt b/3rdParty/metis/metis-5.1.0/libmetis/CMakeLists.txt
new file mode 100644
index 000000000..6bef2b577
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Add this directory for internal users.
+#include_directories(.)
+# Find sources.
+file(GLOB metis_sources *.c)
+# Build libmetis.
+add_library(metis ${METIS_LIBRARY_TYPE} ${GKlib_sources} ${metis_sources})
+if(UNIX)
+  target_link_libraries(metis m)
+
+  target_compile_options(metis PRIVATE "-Wno-format")
+endif()
+
+if(MSVC)
+  target_compile_options(metis PRIVATE "/W0")
+endif()
+
+if(METIS_INSTALL)
+  install(TARGETS metis
+    LIBRARY DESTINATION lib
+    RUNTIME DESTINATION lib
+    ARCHIVE DESTINATION lib)
+endif()
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/auxapi.c b/3rdParty/metis/metis-5.1.0/libmetis/auxapi.c
new file mode 100644
index 000000000..8976b4ba4
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/auxapi.c
@@ -0,0 +1,43 @@
+/**
+\file
+\brief This file contains various helper API routines for using METIS.
+
+\date   Started 5/12/2011
+\author George  
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version\verbatim $Id: auxapi.c 10409 2011-06-25 16:58:34Z karypis $ \endverbatim
+*/
+
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function free memory that was allocated by METIS and retuned
+    to the application.
+    
+    \param ptr points to the memory that was previously allocated by
+           METIS.
+*/
+/*************************************************************************/
+int METIS_Free(void *ptr)
+{
+  if (ptr != NULL) free(ptr);
+  return METIS_OK;
+}
+
+
+/*************************************************************************/
+/*! This function sets the default values for the options.
+    
+    \param options points to an array of size at least METIS_NOPTIONS.
+*/
+/*************************************************************************/
+int METIS_SetDefaultOptions(idx_t *options)
+{
+  iset(METIS_NOPTIONS, -1, options);
+
+  return METIS_OK;
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/balance.c b/3rdParty/metis/metis-5.1.0/libmetis/balance.c
new file mode 100644
index 000000000..326f3948c
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/balance.c
@@ -0,0 +1,498 @@
+/*!
+\file
+\brief Functions for the edge-based balancing 
+
+\date Started 7/23/97
+\author George  
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version\verbatim $Id: balance.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim
+*/
+
+#include "metislib.h"
+
+/*************************************************************************
+* This function is the entry poidx_t of the bisection balancing algorithms.
+**************************************************************************/
+void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts)
+{
+  if (ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors) <= 0) 
+    return;
+
+  if (graph->ncon == 1) {
+    /* return right away if the balance is OK */
+    if (abs(ntpwgts[0]*graph->tvwgt[0]-graph->pwgts[0]) < 3*graph->tvwgt[0]/graph->nvtxs)
+      return;
+
+    if (graph->nbnd > 0)
+      Bnd2WayBalance(ctrl, graph, ntpwgts);
+    else
+      General2WayBalance(ctrl, graph, ntpwgts);
+  }
+  else {
+    McGeneral2WayBalance(ctrl, graph, ntpwgts);
+  }
+}
+
+
+/*************************************************************************
+* This function balances two partitions by moving boundary nodes
+* from the domain that is overweight to the one that is underweight.
+**************************************************************************/
+void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts)
+{
+  idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts;
+  idx_t *moved, *perm;
+  rpq_t *queue;
+  idx_t higain, mincut, mindiff;
+  idx_t tpwgts[2];
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  where  = graph->where;
+  id     = graph->id;
+  ed     = graph->ed;
+  pwgts  = graph->pwgts;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  moved = iwspacemalloc(ctrl, nvtxs);
+  perm  = iwspacemalloc(ctrl, nvtxs);
+
+  /* Determine from which domain you will be moving data */
+  tpwgts[0] = graph->tvwgt[0]*ntpwgts[0];
+  tpwgts[1] = graph->tvwgt[0] - tpwgts[0];
+  mindiff   = iabs(tpwgts[0]-pwgts[0]);
+  from      = (pwgts[0] < tpwgts[0] ? 1 : 0);
+  to        = (from+1)%2;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+     printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n",
+             pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, 
+             graph->mincut));
+
+  queue = rpqCreate(nvtxs);
+
+  iset(nvtxs, -1, moved);
+
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+  ASSERT(CheckBnd(graph));
+
+  /* Insert the boundary nodes of the proper partition whose size is OK in the priority queue */
+  nbnd = graph->nbnd;
+  irandArrayPermute(nbnd, perm, nbnd/5, 1);
+  for (ii=0; ii<nbnd; ii++) {
+    i = perm[ii];
+    ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0);
+    ASSERT(bndptr[bndind[i]] != -1);
+    if (where[bndind[i]] == from && vwgt[bndind[i]] <= mindiff)
+      rpqInsert(queue, bndind[i], ed[bndind[i]]-id[bndind[i]]);
+  }
+
+  mincut = graph->mincut;
+  for (nswaps=0; nswaps<nvtxs; nswaps++) {
+    if ((higain = rpqGetTop(queue)) == -1)
+      break;
+    ASSERT(bndptr[higain] != -1);
+
+    if (pwgts[to]+vwgt[higain] > tpwgts[to])
+      break;
+
+    mincut -= (ed[higain]-id[higain]);
+    INC_DEC(pwgts[to], pwgts[from], vwgt[higain]);
+
+    where[higain] = to;
+    moved[higain] = nswaps;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+      printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1]));
+
+    /**************************************************************
+    * Update the id[i]/ed[i] values of the affected nodes
+    ***************************************************************/
+    SWAP(id[higain], ed[higain], tmp);
+    if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
+      BNDDelete(nbnd, bndind,  bndptr, higain);
+
+    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+      k = adjncy[j];
+      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+      INC_DEC(id[k], ed[k], kwgt);
+
+      /* Update its boundary information and queue position */
+      if (bndptr[k] != -1) { /* If k was a boundary vertex */
+        if (ed[k] == 0) { /* Not a boundary vertex any more */
+          BNDDelete(nbnd, bndind, bndptr, k);
+          if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff)  /* Remove it if in the queues */
+            rpqDelete(queue, k);
+        }
+        else { /* If it has not been moved, update its position in the queue */
+          if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff)
+            rpqUpdate(queue, k, ed[k]-id[k]);
+        }
+      }
+      else {
+        if (ed[k] > 0) {  /* It will now become a boundary vertex */
+          BNDInsert(nbnd, bndind, bndptr, k);
+          if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) 
+            rpqInsert(queue, k, ed[k]-id[k]);
+        }
+      }
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+    printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd));
+
+  graph->mincut = mincut;
+  graph->nbnd   = nbnd;
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************
+* This function balances two partitions by moving the highest gain 
+* (including negative gain) vertices to the other domain.
+* It is used only when tha unbalance is due to non contigous
+* subdomains. That is, the are no boundary vertices.
+* It moves vertices from the domain that is overweight to the one that 
+* is underweight.
+**************************************************************************/
+void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts)
+{
+  idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts;
+  idx_t *moved, *perm;
+  rpq_t *queue;
+  idx_t higain, mincut, mindiff;
+  idx_t tpwgts[2];
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  where  = graph->where;
+  id     = graph->id;
+  ed     = graph->ed;
+  pwgts  = graph->pwgts;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  moved = iwspacemalloc(ctrl, nvtxs);
+  perm  = iwspacemalloc(ctrl, nvtxs);
+
+  /* Determine from which domain you will be moving data */
+  tpwgts[0] = graph->tvwgt[0]*ntpwgts[0];
+  tpwgts[1] = graph->tvwgt[0] - tpwgts[0];
+  mindiff   = iabs(tpwgts[0]-pwgts[0]);
+  from      = (pwgts[0] < tpwgts[0] ? 1 : 0);
+  to        = (from+1)%2;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+     printf("Partitions: [%6"PRIDX" %6"PRIDX"] T[%6"PRIDX" %6"PRIDX"], Nv-Nb[%6"PRIDX" %6"PRIDX"]. ICut: %6"PRIDX" [B]\n",
+             pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));
+
+  queue = rpqCreate(nvtxs);
+
+  iset(nvtxs, -1, moved);
+
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+  ASSERT(CheckBnd(graph));
+
+  /* Insert the nodes of the proper partition whose size is OK in the priority queue */
+  irandArrayPermute(nvtxs, perm, nvtxs/5, 1);
+  for (ii=0; ii<nvtxs; ii++) {
+    i = perm[ii];
+    if (where[i] == from && vwgt[i] <= mindiff)
+      rpqInsert(queue, i, ed[i]-id[i]);
+  }
+
+  mincut = graph->mincut;
+  nbnd = graph->nbnd;
+  for (nswaps=0; nswaps<nvtxs; nswaps++) {
+    if ((higain = rpqGetTop(queue)) == -1)
+      break;
+
+    if (pwgts[to]+vwgt[higain] > tpwgts[to])
+      break;
+
+    mincut -= (ed[higain]-id[higain]);
+    INC_DEC(pwgts[to], pwgts[from], vwgt[higain]);
+
+    where[higain] = to;
+    moved[higain] = nswaps;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+      printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1]));
+
+    /**************************************************************
+    * Update the id[i]/ed[i] values of the affected nodes
+    ***************************************************************/
+    SWAP(id[higain], ed[higain], tmp);
+    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
+      BNDDelete(nbnd, bndind,  bndptr, higain);
+    if (ed[higain] > 0 && bndptr[higain] == -1)
+      BNDInsert(nbnd, bndind,  bndptr, higain);
+
+    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+      k = adjncy[j];
+
+      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+      INC_DEC(id[k], ed[k], kwgt);
+
+      /* Update the queue position */
+      if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff)
+        rpqUpdate(queue, k, ed[k]-id[k]);
+
+      /* Update its boundary information */
+      if (ed[k] == 0 && bndptr[k] != -1) 
+        BNDDelete(nbnd, bndind, bndptr, k);
+      else if (ed[k] > 0 && bndptr[k] == -1)  
+        BNDInsert(nbnd, bndind, bndptr, k);
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+    printf("\tMinimum cut: %6"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, pwgts[0], pwgts[1], nbnd));
+
+  graph->mincut = mincut;
+  graph->nbnd   = nbnd;
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************
+* This function performs an edge-based FM refinement
+**************************************************************************/
+void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts)
+{
+  idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, 
+        me, limit, tmp, cnum;
+  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts, *id, *ed, *bndptr, *bndind;
+  idx_t *moved, *swaps, *perm, *qnum, *qsizes;
+  idx_t higain, mincut, newcut, mincutorder;
+  real_t *invtvwgt, *minbalv, *newbalv, minbal, newbal;
+  rpq_t **queues;
+
+  WCOREPUSH;
+
+  nvtxs    = graph->nvtxs;
+  ncon     = graph->ncon;
+  xadj     = graph->xadj;
+  vwgt     = graph->vwgt;
+  adjncy   = graph->adjncy;
+  adjwgt   = graph->adjwgt;
+  invtvwgt = graph->invtvwgt;
+  where    = graph->where;
+  id       = graph->id;
+  ed       = graph->ed;
+  pwgts    = graph->pwgts;
+  bndptr   = graph->bndptr;
+  bndind   = graph->bndind;
+
+  moved   = iwspacemalloc(ctrl, nvtxs);
+  swaps   = iwspacemalloc(ctrl, nvtxs);
+  perm    = iwspacemalloc(ctrl, nvtxs);
+  qnum    = iwspacemalloc(ctrl, nvtxs);
+  newbalv = rwspacemalloc(ctrl, ncon);
+  minbalv = rwspacemalloc(ctrl, ncon);
+  qsizes  = iwspacemalloc(ctrl, 2*ncon);
+
+  limit = gk_min(gk_max(0.01*nvtxs, 15), 100);
+
+  /* Initialize the queues */
+  queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *));
+  for (i=0; i<2*ncon; i++) {
+    queues[i] = rpqCreate(nvtxs);
+    qsizes[i] = 0;
+  }
+
+  for (i=0; i<nvtxs; i++) {
+    qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt);
+    qsizes[2*qnum[i]+where[i]]++;
+  }
+
+
+  /* for the empty queues, move into them vertices from other queues */
+  for (from=0; from<2; from++) {
+    for (j=0; j<ncon; j++) {
+      if (qsizes[2*j+from] == 0) {
+        for (i=0; i<nvtxs; i++) {
+          if (where[i] != from)
+            continue;
+
+          k = iargmax2_nrm(ncon, vwgt+i*ncon, invtvwgt);
+          if (k == j && 
+              qsizes[2*qnum[i]+from] > qsizes[2*j+from] && 
+              vwgt[i*ncon+qnum[i]]*invtvwgt[qnum[i]] < 1.3*vwgt[i*ncon+j]*invtvwgt[j]) {
+            qsizes[2*qnum[i]+from]--;
+            qsizes[2*j+from]++;
+            qnum[i] = j;
+          }
+        }
+      }
+    }
+  }
+
+
+  minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, minbalv);
+  ASSERT(minbal > 0.0);
+
+  newcut = mincut = graph->mincut;
+  mincutorder = -1;
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+    printf("Parts: [");
+    for (l=0; l<ncon; l++)
+      printf("(%6"PRIDX" %6"PRIDX" %.3"PRREAL" %.3"PRREAL") ", 
+          pwgts[l], pwgts[ncon+l], ntpwgts[l], ntpwgts[ncon+l]);
+    printf("] Nv-Nb[%5"PRIDX", %5"PRIDX"]. ICut: %6"PRIDX", LB: %+.3"PRREAL" [B]\n", 
+           graph->nvtxs, graph->nbnd, graph->mincut, minbal);
+  }
+
+  iset(nvtxs, -1, moved);
+
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+  ASSERT(CheckBnd(graph));
+
+  /* Insert all nodes in the priority queues */
+  nbnd = graph->nbnd;
+  irandArrayPermute(nvtxs, perm, nvtxs/10, 1);
+  for (ii=0; ii<nvtxs; ii++) {
+    i = perm[ii];
+    rpqInsert(queues[2*qnum[i]+where[i]], i, ed[i]-id[i]);
+  }
+
+  for (nswaps=0; nswaps<nvtxs; nswaps++) {
+    if (minbal <= 0.0)
+      break;
+
+    SelectQueue(graph, ctrl->pijbm, ctrl->ubfactors, queues, &from, &cnum);
+    to = (from+1)%2;
+
+    if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1)
+      break;
+
+    newcut -= (ed[higain]-id[higain]);
+
+    iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
+    iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
+    newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, newbalv);
+
+    if (newbal < minbal || (newbal == minbal && 
+        (newcut < mincut || 
+         (newcut == mincut && BetterBalance2Way(ncon, minbalv, newbalv))))) {
+      mincut      = newcut;
+      minbal      = newbal;
+      mincutorder = nswaps;
+      rcopy(ncon, newbalv, minbalv);
+    }
+    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
+      newcut += (ed[higain]-id[higain]);
+      iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
+      iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
+      break;
+    }
+
+    where[higain] = to;
+    moved[higain] = nswaps;
+    swaps[nswaps] = higain;
+
+    if (ctrl->dbglvl&METIS_DBG_MOVEINFO) {
+      printf("Moved %6"PRIDX" from %"PRIDX"(%"PRIDX"). Gain: %5"PRIDX", "
+             "Cut: %5"PRIDX", NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
+      for (l=0; l<ncon; l++) 
+        printf("(%6"PRIDX", %6"PRIDX") ", pwgts[l], pwgts[ncon+l]);
+      printf(", %+.3"PRREAL" LB: %+.3"PRREAL"\n", minbal, newbal);
+    }
+
+
+    /**************************************************************
+    * Update the id[i]/ed[i] values of the affected nodes
+    ***************************************************************/
+    SWAP(id[higain], ed[higain], tmp);
+    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
+      BNDDelete(nbnd, bndind,  bndptr, higain);
+    if (ed[higain] > 0 && bndptr[higain] == -1)
+      BNDInsert(nbnd, bndind,  bndptr, higain);
+
+    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+      k = adjncy[j];
+
+      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+      INC_DEC(id[k], ed[k], kwgt);
+
+      /* Update the queue position */
+      if (moved[k] == -1)
+        rpqUpdate(queues[2*qnum[k]+where[k]], k, ed[k]-id[k]);
+
+      /* Update its boundary information */
+      if (ed[k] == 0 && bndptr[k] != -1) 
+        BNDDelete(nbnd, bndind, bndptr, k);
+      else if (ed[k] > 0 && bndptr[k] == -1)  
+        BNDInsert(nbnd, bndind, bndptr, k);
+    }
+  }
+
+
+
+  /****************************************************************
+  * Roll back computations
+  *****************************************************************/
+  for (nswaps--; nswaps>mincutorder; nswaps--) {
+    higain = swaps[nswaps];
+
+    to = where[higain] = (where[higain]+1)%2;
+    SWAP(id[higain], ed[higain], tmp);
+    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
+      BNDDelete(nbnd, bndind,  bndptr, higain);
+    else if (ed[higain] > 0 && bndptr[higain] == -1)
+      BNDInsert(nbnd, bndind,  bndptr, higain);
+
+    iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,         1);
+    iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1);
+    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+      k = adjncy[j];
+
+      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+      INC_DEC(id[k], ed[k], kwgt);
+
+      if (bndptr[k] != -1 && ed[k] == 0)
+        BNDDelete(nbnd, bndind, bndptr, k);
+      if (bndptr[k] == -1 && ed[k] > 0)
+        BNDInsert(nbnd, bndind, bndptr, k);
+    }
+  }
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+    printf("\tMincut: %6"PRIDX" at %5"PRIDX", NBND: %6"PRIDX", NPwgts: [", 
+        mincut, mincutorder, nbnd);
+    for (l=0; l<ncon; l++)
+      printf("(%6"PRIDX", %6"PRIDX") ", pwgts[l], pwgts[ncon+l]);
+    printf("], LB: %.3"PRREAL"\n", ComputeLoadImbalance(graph, 2, ctrl->pijbm));
+  }
+
+  graph->mincut = mincut;
+  graph->nbnd   = nbnd;
+
+
+  for (i=0; i<2*ncon; i++) 
+    rpqDestroy(queues[i]);
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/bucketsort.c b/3rdParty/metis/metis-5.1.0/libmetis/bucketsort.c
new file mode 100644
index 000000000..e126d02a6
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/bucketsort.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * bucketsort.c
+ *
+ * This file contains code that implement a variety of counting sorting
+ * algorithms
+ *
+ * Started 7/25/97
+ * George
+ *
+ */
+
+#include "metislib.h"
+
+
+
+/*************************************************************************
+* This function uses simple counting sort to return a permutation array
+* corresponding to the sorted order. The keys are arsumed to start from
+* 0 and they are positive.  This sorting is used during matching.
+**************************************************************************/
+void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys, 
+         idx_t *tperm, idx_t *perm)
+{
+  idx_t i, ii;
+  idx_t *counts;
+
+  WCOREPUSH;
+
+  counts = iset(max+2, 0, iwspacemalloc(ctrl, max+2));
+
+  for (i=0; i<n; i++)
+    counts[keys[i]]++;
+  MAKECSR(i, max+1, counts);
+
+  for (ii=0; ii<n; ii++) {
+    i = tperm[ii];
+    perm[counts[keys[i]]++] = i;
+  }
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/checkgraph.c b/3rdParty/metis/metis-5.1.0/libmetis/checkgraph.c
new file mode 100644
index 000000000..852634614
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/checkgraph.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * checkgraph.c
+ *
+ * This file contains routines related to I/O
+ *
+ * Started 8/28/94
+ * George
+ *
+ */
+
+#include "metislib.h"
+
+
+
+/*************************************************************************/
+/*! This function checks if a graph is valid. A valid graph must satisfy 
+    the following constraints:
+    - It should contain no self-edges.
+    - It should be undirected; i.e., (u,v) and (v,u) should be present.
+    - The adjacency list should not contain multiple edges to the same
+      other vertex.
+
+    \param graph is the graph to be checked, whose numbering starts from 0.
+    \param numflag is 0 if error reporting will be done using 0 as the
+           numbering, or 1 if the reporting should be done using 1.
+    \param verbose is 1 the identified errors will be displayed, or 0, if
+           it should run silently.
+*/
+/*************************************************************************/
+int CheckGraph(graph_t *graph, int numflag, int verbose)
+{
+  idx_t i, j, k, l;
+  idx_t nvtxs, err=0;
+  idx_t minedge, maxedge, minewgt, maxewgt;
+  idx_t *xadj, *adjncy, *adjwgt, *htable;
+
+  numflag = (numflag == 0 ? 0 : 1);  /* make sure that numflag is 0 or 1 */
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  ASSERT(adjwgt != NULL);
+
+  htable = ismalloc(nvtxs, 0, "htable");
+
+  minedge = maxedge = adjncy[0];
+  minewgt = maxewgt = adjwgt[0];
+
+  for (i=0; i<nvtxs; i++) {
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+
+      minedge = (k < minedge) ? k : minedge;
+      maxedge = (k > maxedge) ? k : maxedge;
+      minewgt = (adjwgt[j] < minewgt) ? adjwgt[j] : minewgt;
+      maxewgt = (adjwgt[j] > maxewgt) ? adjwgt[j] : maxewgt;
+
+      if (i == k) {
+        if (verbose)
+          printf("Vertex %"PRIDX" contains a self-loop "
+                 "(i.e., diagonal entry in the matrix)!\n", i+numflag);
+        err++;
+      }
+      else {
+        for (l=xadj[k]; l<xadj[k+1]; l++) {
+          if (adjncy[l] == i) {
+            if (adjwgt[l] != adjwgt[j]) {
+              if (verbose) 
+                printf("Edges (u:%"PRIDX" v:%"PRIDX" wgt:%"PRIDX") and "
+                       "(v:%"PRIDX" u:%"PRIDX" wgt:%"PRIDX") "
+                       "do not have the same weight!\n", 
+                       i+numflag, k+numflag, adjwgt[j],
+                       k+numflag, i+numflag, adjwgt[l]);
+              err++;
+            }
+            break;
+          }
+        }
+        if (l == xadj[k+1]) {
+          if (verbose)
+            printf("Missing edge: (%"PRIDX" %"PRIDX")!\n", k+numflag, i+numflag);
+          err++;
+        }
+      }
+
+      if (htable[k] == 0) {
+        htable[k]++;
+      }
+      else {
+        if (verbose)
+          printf("Edge %"PRIDX" from vertex %"PRIDX" is repeated %"PRIDX" times\n", 
+              k+numflag, i+numflag, htable[k]++);
+        err++;
+      }
+    }
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) 
+      htable[adjncy[j]] = 0;
+  }
+
+ 
+  if (err > 0 && verbose) { 
+    printf("A total of %"PRIDX" errors exist in the input file. "
+           "Correct them, and run again!\n", err);
+  }
+
+  gk_free((void **)&htable, LTERM);
+
+  return (err == 0 ? 1 : 0);
+}
+
+
+/*************************************************************************/
+/*! This function performs a quick check of the weights of the graph */
+/*************************************************************************/
+int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, 
+        idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) 
+{
+  idx_t i;
+
+  if (ncon <= 0) {
+    printf("Input Error: ncon must be >= 1.\n");
+    return 0;
+  }
+
+  if (vwgt) {
+    for (i=ncon*nvtxs; i>=0; i--) {
+      if (vwgt[i] < 0) {
+        printf("Input Error: negative vertex weight(s).\n");
+        return 0;
+      }
+    }
+  }
+  if (vsize) {
+    for (i=nvtxs; i>=0; i--) {
+      if (vsize[i] < 0) {
+        printf("Input Error: negative vertex sizes(s).\n");
+        return 0;
+      }
+    }
+  }
+  if (adjwgt) {
+    for (i=xadj[nvtxs]-1; i>=0; i--) {
+      if (adjwgt[i] < 0) {
+        printf("Input Error: non-positive edge weight(s).\n");
+        return 0;
+      }
+    }
+  }
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function creates a graph whose topology is consistent with 
+    Metis' requirements that:
+    - There are no self-edges.
+    - It is undirected; i.e., (u,v) and (v,u) should be present and of the
+      same weight.
+    - The adjacency list should not contain multiple edges to the same
+      other vertex.
+
+    Any of the above errors are fixed by performing the following operations:
+    - Self-edges are removed.
+    - The undirected graph is formed by the union of edges.
+    - One of the duplicate edges is selected.
+
+    The routine does not change the provided vertex weights.
+*/
+/*************************************************************************/
+graph_t *FixGraph(graph_t *graph)
+{
+  idx_t i, j, k, l, nvtxs, nedges;
+  idx_t *xadj, *adjncy, *adjwgt;
+  idx_t *nxadj, *nadjncy, *nadjwgt;
+  graph_t *ngraph;
+  uvw_t *edges;
+
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  ASSERT(adjwgt != NULL);
+
+  ngraph = CreateGraph();
+
+  ngraph->nvtxs = nvtxs;
+
+  /* deal with vertex weights/sizes */
+  ngraph->ncon  = graph->ncon;
+  ngraph->vwgt  = icopy(nvtxs*graph->ncon, graph->vwgt, 
+                        imalloc(nvtxs*graph->ncon, "FixGraph: vwgt"));
+
+  ngraph->vsize = ismalloc(nvtxs, 1, "FixGraph: vsize");
+  if (graph->vsize)
+    icopy(nvtxs, graph->vsize, ngraph->vsize);
+
+  /* fix graph by sorting the "superset" of edges */
+  edges = (uvw_t *)gk_malloc(sizeof(uvw_t)*2*xadj[nvtxs], "FixGraph: edges");
+
+  for (nedges=0, i=0; i<nvtxs; i++) {
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      /* keep only the upper-trianglular part of the adjacency matrix */
+      if (i < adjncy[j]) {
+        edges[nedges].u = i;
+        edges[nedges].v = adjncy[j];
+        edges[nedges].w = adjwgt[j];
+        nedges++;
+      }
+      else if (i > adjncy[j]) {
+        edges[nedges].u = adjncy[j];
+        edges[nedges].v = i;
+        edges[nedges].w = adjwgt[j];
+        nedges++;
+      }
+    }
+  }
+
+  uvwsorti(nedges, edges);
+
+
+  /* keep the unique subset */
+  for (k=0, i=1; i<nedges; i++) {
+    if (edges[k].v != edges[i].v || edges[k].u != edges[i].u) {
+      edges[++k] = edges[i];
+    }
+  }
+  nedges = k+1;
+
+  /* allocate memory for the fixed graph */
+  nxadj   = ngraph->xadj   = ismalloc(nvtxs+1, 0, "FixGraph: nxadj");
+  nadjncy = ngraph->adjncy = imalloc(2*nedges, "FixGraph: nadjncy");
+  nadjwgt = ngraph->adjwgt = imalloc(2*nedges, "FixGraph: nadjwgt");
+
+  /* create the adjacency list of the fixed graph from the upper-triangular
+     part of the adjacency matrix */
+  for (k=0; k<nedges; k++) {
+    nxadj[edges[k].u]++;
+    nxadj[edges[k].v]++;
+  }
+  MAKECSR(i, nvtxs, nxadj);
+
+  for (k=0; k<nedges; k++) {
+    nadjncy[nxadj[edges[k].u]] = edges[k].v;
+    nadjncy[nxadj[edges[k].v]] = edges[k].u;
+    nadjwgt[nxadj[edges[k].u]] = edges[k].w;
+    nadjwgt[nxadj[edges[k].v]] = edges[k].w;
+    nxadj[edges[k].u]++;
+    nxadj[edges[k].v]++;
+  }
+  SHIFTCSR(i, nvtxs, nxadj);
+
+  gk_free((void **)&edges, LTERM);
+
+  return ngraph;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/coarsen.c b/3rdParty/metis/metis-5.1.0/libmetis/coarsen.c
new file mode 100644
index 000000000..165344e6e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/coarsen.c
@@ -0,0 +1,1132 @@
+/*!
+\file  
+\brief Functions for computing matchings during graph coarsening
+
+\date Started 7/23/97
+\author George  
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version\verbatim $Id: coarsen.c 13936 2013-03-30 03:59:09Z karypis $ \endverbatim
+*/
+
+
+#include "metislib.h"
+
+#define UNMATCHEDFOR2HOP  0.10  /* The fraction of unmatched vertices that triggers 2-hop */
+                                  
+
+/*************************************************************************/
+/*! This function takes a graph and creates a sequence of coarser graphs.
+    It implements the coarsening phase of the multilevel paradigm. 
+ */
+/*************************************************************************/
+graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, eqewgts, level=0;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr));
+
+  /* determine if the weights on the edges are all the same */
+  for (eqewgts=1, i=1; i<graph->nedges; i++) {
+    if (graph->adjwgt[0] != graph->adjwgt[i]) {
+      eqewgts = 0;
+      break;
+    }
+  }
+
+  /* set the maximum allowed coarsest vertex weight */
+  for (i=0; i<graph->ncon; i++)
+    ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo;
+
+  do {
+    IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));
+
+    /* allocate memory for cmap, if it has not already been done due to
+       multiple cuts */
+    if (graph->cmap == NULL)
+      graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap");
+
+    /* determine which matching scheme you will use */
+    switch (ctrl->ctype) {
+      case METIS_CTYPE_RM:
+        Match_RM(ctrl, graph);
+        break;
+      case METIS_CTYPE_SHEM:
+        if (eqewgts || graph->nedges == 0)
+          Match_RM(ctrl, graph);
+        else
+          Match_SHEM(ctrl, graph);
+        break;
+      default:
+        gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype);
+    }
+
+    graph = graph->coarser;
+    eqewgts = 0;
+    level++;
+
+    ASSERT(CheckGraph(graph, 0, 1));
+
+  } while (graph->nvtxs > ctrl->CoarsenTo && 
+           graph->nvtxs < COARSEN_FRACTION*graph->finer->nvtxs && 
+           graph->nedges > graph->nvtxs/2);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr));
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! This function takes a graph and creates a sequence of nlevels coarser 
+    graphs, where nlevels is an input parameter.
+ */
+/*************************************************************************/
+graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels)
+{
+  idx_t i, eqewgts, level;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr));
+
+  /* determine if the weights on the edges are all the same */
+  for (eqewgts=1, i=1; i<graph->nedges; i++) {
+    if (graph->adjwgt[0] != graph->adjwgt[i]) {
+      eqewgts = 0;
+      break;
+    }
+  }
+
+  /* set the maximum allowed coarsest vertex weight */
+  for (i=0; i<graph->ncon; i++)
+    ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo;
+
+  for (level=0; level<nlevels; level++) {
+    IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));
+
+    /* allocate memory for cmap, if it has not already been done due to
+       multiple cuts */
+    if (graph->cmap == NULL)
+      graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap");
+
+    /* determine which matching scheme you will use */
+    switch (ctrl->ctype) {
+      case METIS_CTYPE_RM:
+        Match_RM(ctrl, graph);
+        break;
+      case METIS_CTYPE_SHEM:
+        if (eqewgts || graph->nedges == 0)
+          Match_RM(ctrl, graph);
+        else
+          Match_SHEM(ctrl, graph);
+        break;
+      default:
+        gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype);
+    }
+
+    graph = graph->coarser;
+    eqewgts = 0;
+
+    ASSERT(CheckGraph(graph, 0, 1));
+
+    if (graph->nvtxs < ctrl->CoarsenTo || 
+        graph->nvtxs > COARSEN_FRACTION*graph->finer->nvtxs || 
+        graph->nedges < graph->nvtxs/2)
+      break; 
+  } 
+
+  IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr));
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! This function finds a matching by randomly selecting one of the 
+    unmatched adjacent vertices. 
+ */
+/**************************************************************************/
+idx_t Match_RM(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, last_unmatched;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt;
+  idx_t *match, *cmap, *perm;
+  size_t nunmatched=0;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr));
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  cmap   = graph->cmap;
+
+  maxvwgt  = ctrl->maxvwgt;
+
+  match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs));
+  perm  = iwspacemalloc(ctrl, nvtxs);
+
+  irandArrayPermute(nvtxs, perm, nvtxs/8, 1);
+
+  for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) {
+    i = perm[pi];
+
+    if (match[i] == UNMATCHED) {  /* Unmatched */
+      maxidx = i;
+
+      if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) {
+        /* Deal with island vertices. Find a non-island and match it with. 
+           The matching ignores ctrl->maxvwgt requirements */
+        if (xadj[i] == xadj[i+1]) {
+          last_unmatched = gk_max(pi, last_unmatched)+1;
+          for (; last_unmatched<nvtxs; last_unmatched++) {
+            j = perm[last_unmatched];
+            if (match[j] == UNMATCHED) {
+              maxidx = j;
+              break;
+            }
+          }
+        }
+        else {
+          /* Find a random matching, subject to maxvwgt constraints */
+          if (ncon == 1) {
+            /* single constraint version */
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              k = adjncy[j];
+              if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= maxvwgt[0]) {
+                maxidx = k;
+                break;
+              }
+            }
+
+            /* If it did not match, record for a 2-hop matching. */
+            if (maxidx == i && 3*vwgt[i] < maxvwgt[0]) {
+              nunmatched++;
+              maxidx = UNMATCHED;
+            }
+          }
+          else {
+            /* multi-constraint version */
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              k = adjncy[j];
+              if (match[k] == UNMATCHED && 
+                  ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt)) {
+                maxidx = k;
+                break;
+              }
+            }
+
+            /* If it did not match, record for a 2-hop matching. */
+            if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) {
+              nunmatched++;
+              maxidx = UNMATCHED;
+            }
+          }
+        }
+      }
+
+      if (maxidx != UNMATCHED) {
+        cmap[i]  = cmap[maxidx] = cnvtxs++;
+        match[i] = maxidx;
+        match[maxidx] = i;
+      }
+    }
+  }
+
+  //printf("nunmatched: %zu\n", nunmatched);
+
+  /* see if a 2-hop matching is required/allowed */
+  if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) 
+    cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched);
+
+
+  /* match the final unmatched vertices with themselves and reorder the vertices 
+     of the coarse graph for memory-friendly contraction */
+  for (cnvtxs=0, i=0; i<nvtxs; i++) {
+    if (match[i] == UNMATCHED) {
+      match[i] = i;
+      cmap[i]  = cnvtxs++;
+    }
+    else {
+      if (i <= match[i]) 
+        cmap[i] = cmap[match[i]] = cnvtxs++;
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr));
+
+  CreateCoarseGraph(ctrl, graph, cnvtxs, match);
+
+  WCOREPOP;
+
+  return cnvtxs;
+}
+
+
+/**************************************************************************/
+/*! This function finds a matching using the HEM heuristic. The vertices 
+    are visited based on increasing degree to ensure that all vertices are 
+    given a chance to match with something. 
+ */
+/**************************************************************************/
+idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, maxwgt, 
+        last_unmatched, avgdegree;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt;
+  idx_t *match, *cmap, *degrees, *perm, *tperm;
+  size_t nunmatched=0;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr));
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  cmap   = graph->cmap;
+
+  maxvwgt  = ctrl->maxvwgt;
+
+  match   = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs));
+  perm    = iwspacemalloc(ctrl, nvtxs);
+  tperm   = iwspacemalloc(ctrl, nvtxs);
+  degrees = iwspacemalloc(ctrl, nvtxs);
+
+  irandArrayPermute(nvtxs, tperm, nvtxs/8, 1);
+
+  avgdegree = 0.7*(xadj[nvtxs]/nvtxs);
+  for (i=0; i<nvtxs; i++) 
+    degrees[i] = (xadj[i+1]-xadj[i] > avgdegree ? avgdegree : xadj[i+1]-xadj[i]);
+  BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm);
+
+  for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) {
+    i = perm[pi];
+
+    if (match[i] == UNMATCHED) {  /* Unmatched */
+      maxidx = i;
+      maxwgt = -1;
+
+      if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) {
+        /* Deal with island vertices. Find a non-island and match it with. 
+           The matching ignores ctrl->maxvwgt requirements */
+        if (xadj[i] == xadj[i+1]) { 
+          last_unmatched = gk_max(pi, last_unmatched)+1;
+          for (; last_unmatched<nvtxs; last_unmatched++) {
+            j = perm[last_unmatched];
+            if (match[j] == UNMATCHED) {
+              maxidx = j;
+              break;
+            }
+          }
+        }
+        else {
+          /* Find a heavy-edge matching, subject to maxvwgt constraints */
+          if (ncon == 1) {
+            /* single constraint version */
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              k = adjncy[j];
+              if (match[k] == UNMATCHED && 
+                  maxwgt < adjwgt[j] && vwgt[i]+vwgt[k] <= maxvwgt[0]) {
+                maxidx = k;
+                maxwgt = adjwgt[j];
+              }
+            }
+
+            /* If it did not match, record for a 2-hop matching. */
+            if (maxidx == i && 3*vwgt[i] < maxvwgt[0]) {
+              nunmatched++;
+              maxidx = UNMATCHED;
+            }
+          }
+          else {
+            /* multi-constraint version */
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              k = adjncy[j];
+              if (match[k] == UNMATCHED && 
+                  ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt) &&
+                  (maxwgt < adjwgt[j] || 
+                   (maxwgt == adjwgt[j] && 
+                    BetterVBalance(ncon, graph->invtvwgt, vwgt+i*ncon, 
+                        vwgt+maxidx*ncon, vwgt+k*ncon)))) {
+                maxidx = k;
+                maxwgt = adjwgt[j];
+              }
+            }
+
+            /* If it did not match, record for a 2-hop matching. */
+            if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) {
+              nunmatched++;
+              maxidx = UNMATCHED;
+            }
+          }
+        }
+      }
+
+      if (maxidx != UNMATCHED) {
+        cmap[i]  = cmap[maxidx] = cnvtxs++;
+        match[i] = maxidx;
+        match[maxidx] = i;
+      }
+    }
+  }
+
+  //printf("nunmatched: %zu\n", nunmatched);
+
+  /* see if a 2-hop matching is required/allowed */
+  if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs) 
+    cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched);
+
+
+  /* match the final unmatched vertices with themselves and reorder the vertices 
+     of the coarse graph for memory-friendly contraction */
+  for (cnvtxs=0, i=0; i<nvtxs; i++) {
+    if (match[i] == UNMATCHED) {
+      match[i] = i;
+      cmap[i] = cnvtxs++;
+    }
+    else {
+      if (i <= match[i]) 
+        cmap[i] = cmap[match[i]] = cnvtxs++;
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr));
+
+  CreateCoarseGraph(ctrl, graph, cnvtxs, match);
+
+  WCOREPOP;
+
+  return cnvtxs;
+}
+
+
+/*************************************************************************/
+/*! This function matches the unmatched vertices using a 2-hop matching 
+    that involves vertices that are two hops away from each other. */
+/**************************************************************************/
+idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, 
+          idx_t cnvtxs, size_t nunmatched)
+{
+
+  cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 2);
+  cnvtxs = Match_2HopAll(ctrl, graph, perm, match, cnvtxs, &nunmatched, 64);
+  if (nunmatched > 1.5*UNMATCHEDFOR2HOP*graph->nvtxs) 
+    cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 3);
+  if (nunmatched > 2.0*UNMATCHEDFOR2HOP*graph->nvtxs) 
+    cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, graph->nvtxs);
+
+  return cnvtxs;
+}
+
+
+/*************************************************************************/
+/*! This function matches the unmatched vertices whose degree is less than
+    maxdegree using a 2-hop matching that involves vertices that are two 
+    hops away from each other. 
+    The requirement of the 2-hop matching is a simple non-empty overlap
+    between the adjancency lists of the vertices. */
+/**************************************************************************/
+idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, 
+          idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree)
+{
+  idx_t i, pi, ii, j, jj, k, nvtxs;
+  idx_t *xadj, *adjncy, *colptr, *rowind;
+  idx_t *cmap;
+  size_t nunmatched;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr));
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  cmap   = graph->cmap;
+
+  nunmatched = *r_nunmatched;
+
+  /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", * nunmatched)); */
+
+  /* create the inverted index */
+  WCOREPUSH;
+  colptr = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs+1));
+  for (i=0; i<nvtxs; i++) {
+    if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) {
+      for (j=xadj[i]; j<xadj[i+1]; j++)
+        colptr[adjncy[j]]++;
+    }
+  }
+  MAKECSR(i, nvtxs, colptr);
+
+  rowind = iwspacemalloc(ctrl, colptr[nvtxs]);
+  for (pi=0; pi<nvtxs; pi++) {
+    i = perm[pi];
+    if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) {
+      for (j=xadj[i]; j<xadj[i+1]; j++)
+        rowind[colptr[adjncy[j]]++] = i;
+    }
+  }
+  SHIFTCSR(i, nvtxs, colptr);
+
+  /* compute matchings by going down the inverted index */
+  for (pi=0; pi<nvtxs; pi++) {
+    i = perm[pi];
+    if (colptr[i+1]-colptr[i] < 2)
+      continue;
+
+    for (jj=colptr[i+1], j=colptr[i]; j<jj; j++) {
+      if (match[rowind[j]] == UNMATCHED) {
+        for (jj--; jj>j; jj--) {
+          if (match[rowind[jj]] == UNMATCHED) {
+            cmap[rowind[j]] = cmap[rowind[jj]] = cnvtxs++;
+            match[rowind[j]]  = rowind[jj];
+            match[rowind[jj]] = rowind[j];
+            nunmatched -= 2;
+            break;
+          }
+        }
+      }
+    }
+  }
+  WCOREPOP;
+
+  /* IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: nunmatched: %zu\n", nunmatched)); */
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr));
+
+  *r_nunmatched = nunmatched;
+  return cnvtxs;
+}
+
+
+/*************************************************************************/
+/*! This function matches the unmatched vertices whose degree is less than
+    maxdegree using a 2-hop matching that involves vertices that are two 
+    hops away from each other. 
+    The requirement of the 2-hop matching is that of identical adjacency
+    lists.
+ */
+/**************************************************************************/
+idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match, 
+          idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree)
+{
+  idx_t i, pi, pk, ii, j, jj, k, nvtxs, mask, idegree;
+  idx_t *xadj, *adjncy;
+  idx_t *cmap, *mark;
+  ikv_t *keys;
+  size_t nunmatched, ncand;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr));
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  cmap   = graph->cmap;
+
+  nunmatched = *r_nunmatched;
+  mask = IDX_MAX/maxdegree;
+
+  /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */
+
+  WCOREPUSH;
+
+  /* collapse vertices with identical adjancency lists */
+  keys = ikvwspacemalloc(ctrl, nunmatched);
+  for (ncand=0, pi=0; pi<nvtxs; pi++) {
+    i = perm[pi];
+    idegree = xadj[i+1]-xadj[i];
+    if (match[i] == UNMATCHED && idegree > 1 && idegree < maxdegree) {
+      for (k=0, j=xadj[i]; j<xadj[i+1]; j++) 
+        k += adjncy[j]%mask;
+      keys[ncand].val = i;
+      keys[ncand].key = (k%mask)*maxdegree + idegree;
+      ncand++;
+    }
+  }
+  ikvsorti(ncand, keys);
+
+  mark = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  for (pi=0; pi<ncand; pi++) {
+    i = keys[pi].val;
+    if (match[i] != UNMATCHED)
+      continue;
+
+    for (j=xadj[i]; j<xadj[i+1]; j++)
+      mark[adjncy[j]] = i;
+
+    for (pk=pi+1; pk<ncand; pk++) {
+      k = keys[pk].val;
+      if (match[k] != UNMATCHED)
+        continue;
+
+      if (keys[pi].key != keys[pk].key)
+        break;
+      if (xadj[i+1]-xadj[i] != xadj[k+1]-xadj[k])
+        break;
+
+      for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+        if (mark[adjncy[jj]] != i)
+          break;
+      }
+      if (jj == xadj[k+1]) {
+        cmap[i] = cmap[k] = cnvtxs++;
+        match[i] = k;
+        match[k] = i;
+        nunmatched -= 2;
+        break;
+      }
+    }
+  }
+  WCOREPOP;
+
+  /*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: ncand: %zu, nunmatched: %zu\n", ncand, nunmatched)); */
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr));
+
+  *r_nunmatched = nunmatched;
+  return cnvtxs;
+}
+
+
+/*************************************************************************/
+/*! This function prints various stats for each graph during coarsening 
+ */
+/*************************************************************************/
+void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i;
+
+  printf("%10"PRIDX" %10"PRIDX" %10"PRIDX" [%"PRIDX"] [", 
+      graph->nvtxs, graph->nedges, isum(graph->nedges, graph->adjwgt, 1), ctrl->CoarsenTo);
+
+  for (i=0; i<graph->ncon; i++)
+    printf(" %8"PRIDX":%8"PRIDX, ctrl->maxvwgt[i], graph->tvwgt[i]);
+  printf(" ]\n");
+}
+
+
+/*************************************************************************/
+/*! This function creates the coarser graph. It uses a simple hash-table 
+    for identifying the adjacent vertices that get collapsed to the same
+    node. The hash-table can have conflicts, which are handled via a
+    linear scan. 
+ */
+/*************************************************************************/
+void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match)
+{
+  idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, 
+        v, u, mask, dovsize;
+  idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;
+  idx_t *cmap, *htable;
+  idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;
+  graph_t *cgraph;
+
+  dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);
+
+  /* Check if the mask-version of the code is a good choice */
+  mask = HTLENGTH;
+  if (cnvtxs < 2*mask || graph->nedges/graph->nvtxs > mask/20) { 
+    CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match);
+    return;
+  }
+
+  nvtxs = graph->nvtxs;
+  xadj  = graph->xadj;
+  for (v=0; v<nvtxs; v++) {
+    if (xadj[v+1]-xadj[v] > (mask>>3)) {
+      CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match);
+      return;
+    }
+  }
+
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));
+
+  ncon    = graph->ncon;
+  vwgt    = graph->vwgt;
+  vsize   = graph->vsize;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  cmap    = graph->cmap;
+
+  /* Initialize the coarser graph */
+  cgraph   = SetupCoarseGraph(graph, cnvtxs, dovsize);
+  cxadj    = cgraph->xadj;
+  cvwgt    = cgraph->vwgt;
+  cvsize   = cgraph->vsize;
+  cadjncy  = cgraph->adjncy;
+  cadjwgt  = cgraph->adjwgt;
+
+  htable = iset(gk_min(cnvtxs+1, mask+1), -1, iwspacemalloc(ctrl, mask+1)); 
+
+  cxadj[0] = cnvtxs = cnedges = 0;
+  for (v=0; v<nvtxs; v++) {
+    if ((u = match[v]) < v)
+      continue;
+
+    ASSERT(cmap[v] == cnvtxs);
+    ASSERT(cmap[match[v]] == cnvtxs);
+
+    if (ncon == 1)
+      cvwgt[cnvtxs] = vwgt[v];
+    else
+      icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);
+
+    if (dovsize)
+      cvsize[cnvtxs] = vsize[v];
+
+    nedges = 0;
+
+    istart = xadj[v];
+    iend   = xadj[v+1];
+    for (j=istart; j<iend; j++) {
+      k  = cmap[adjncy[j]];
+      kk = k&mask;
+      if ((m = htable[kk]) == -1) {
+        cadjncy[nedges] = k;
+        cadjwgt[nedges] = adjwgt[j];
+        htable[kk] = nedges++;
+      }
+      else if (cadjncy[m] == k) {
+        cadjwgt[m] += adjwgt[j];
+      }
+      else {
+        for (jj=0; jj<nedges; jj++) {
+          if (cadjncy[jj] == k) {
+            cadjwgt[jj] += adjwgt[j];
+            break;
+          }
+        }
+        if (jj == nedges) {
+          cadjncy[nedges]   = k;
+          cadjwgt[nedges++] = adjwgt[j];
+        }
+      }
+    }
+
+    if (v != u) { 
+      if (ncon == 1)
+        cvwgt[cnvtxs] += vwgt[u];
+      else
+        iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);
+
+      if (dovsize)
+        cvsize[cnvtxs] += vsize[u];
+
+      istart = xadj[u];
+      iend   = xadj[u+1];
+      for (j=istart; j<iend; j++) {
+        k  = cmap[adjncy[j]];
+        kk = k&mask;
+        if ((m = htable[kk]) == -1) {
+          cadjncy[nedges] = k;
+          cadjwgt[nedges] = adjwgt[j];
+          htable[kk]      = nedges++;
+        }
+        else if (cadjncy[m] == k) {
+          cadjwgt[m] += adjwgt[j];
+        }
+        else {
+          for (jj=0; jj<nedges; jj++) {
+            if (cadjncy[jj] == k) {
+              cadjwgt[jj] += adjwgt[j];
+              break;
+            }
+          }
+          if (jj == nedges) {
+            cadjncy[nedges]   = k;
+            cadjwgt[nedges++] = adjwgt[j];
+          }
+        }
+      }
+
+      /* Remove the contracted adjacency weight */
+      jj = htable[cnvtxs&mask];
+      if (jj >= 0 && cadjncy[jj] != cnvtxs) {
+        for (jj=0; jj<nedges; jj++) {
+          if (cadjncy[jj] == cnvtxs) 
+            break;
+        }
+      }
+      /* This 2nd check is needed for non-adjacent matchings */
+      if (jj >= 0 && jj < nedges && cadjncy[jj] == cnvtxs) { 
+        cadjncy[jj] = cadjncy[--nedges];
+        cadjwgt[jj] = cadjwgt[nedges];
+      }
+    }
+
+    /* Zero out the htable */
+    for (j=0; j<nedges; j++)
+      htable[cadjncy[j]&mask] = -1;  
+    htable[cnvtxs&mask] = -1;
+
+    cnedges         += nedges;
+    cxadj[++cnvtxs]  = cnedges;
+    cadjncy         += nedges;
+    cadjwgt         += nedges;
+  }
+
+  cgraph->nedges = cnedges;
+
+  for (j=0; j<ncon; j++) {
+    cgraph->tvwgt[j]    = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon);
+    cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1);
+  }
+
+
+  ReAdjustMemory(ctrl, graph, cgraph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function creates the coarser graph. It uses a full-size array
+    (htable) for identifying the adjacent vertices that get collapsed to 
+    the same node.  
+ */
+/*************************************************************************/
+void CreateCoarseGraphNoMask(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match)
+{
+  idx_t j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize;
+  idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;
+  idx_t *cmap, *htable;
+  idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;
+  graph_t *cgraph;
+
+  WCOREPUSH;
+
+  dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));
+
+  nvtxs   = graph->nvtxs;
+  ncon    = graph->ncon;
+  xadj    = graph->xadj;
+  vwgt    = graph->vwgt;
+  vsize   = graph->vsize;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  cmap    = graph->cmap;
+
+
+  /* Initialize the coarser graph */
+  cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize);
+  cxadj    = cgraph->xadj;
+  cvwgt    = cgraph->vwgt;
+  cvsize   = cgraph->vsize;
+  cadjncy  = cgraph->adjncy;
+  cadjwgt  = cgraph->adjwgt;
+
+  htable = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs));
+
+  cxadj[0] = cnvtxs = cnedges = 0;
+  for (v=0; v<nvtxs; v++) {
+    if ((u = match[v]) < v)
+      continue;
+
+    ASSERT(cmap[v] == cnvtxs);
+    ASSERT(cmap[match[v]] == cnvtxs);
+
+    if (ncon == 1)
+      cvwgt[cnvtxs] = vwgt[v];
+    else
+      icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);
+
+    if (dovsize)
+      cvsize[cnvtxs] = vsize[v];
+
+    nedges = 0;
+
+    istart = xadj[v];
+    iend   = xadj[v+1];
+    for (j=istart; j<iend; j++) {
+      k = cmap[adjncy[j]];
+      if ((m = htable[k]) == -1) {
+        cadjncy[nedges] = k;
+        cadjwgt[nedges] = adjwgt[j];
+        htable[k] = nedges++;
+      }
+      else {
+        cadjwgt[m] += adjwgt[j];
+      }
+    }
+
+    if (v != u) { 
+      if (ncon == 1)
+        cvwgt[cnvtxs] += vwgt[u];
+      else
+        iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);
+
+      if (dovsize)
+        cvsize[cnvtxs] += vsize[u];
+
+      istart = xadj[u];
+      iend   = xadj[u+1];
+      for (j=istart; j<iend; j++) {
+        k = cmap[adjncy[j]];
+        if ((m = htable[k]) == -1) {
+          cadjncy[nedges] = k;
+          cadjwgt[nedges] = adjwgt[j];
+          htable[k] = nedges++;
+        }
+        else {
+          cadjwgt[m] += adjwgt[j];
+        }
+      }
+
+      /* Remove the contracted adjacency weight */
+      if ((j = htable[cnvtxs]) != -1) {
+        ASSERT(cadjncy[j] == cnvtxs);
+        cadjncy[j]        = cadjncy[--nedges];
+        cadjwgt[j]        = cadjwgt[nedges];
+        htable[cnvtxs] = -1;
+      }
+    }
+
+    /* Zero out the htable */
+    for (j=0; j<nedges; j++)
+      htable[cadjncy[j]] = -1;  
+
+    cnedges         += nedges;
+    cxadj[++cnvtxs]  = cnedges;
+    cadjncy         += nedges;
+    cadjwgt         += nedges;
+  }
+
+  cgraph->nedges = cnedges;
+
+  for (j=0; j<ncon; j++) {
+    cgraph->tvwgt[j]    = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon);
+    cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1);
+  }
+
+  ReAdjustMemory(ctrl, graph, cgraph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function creates the coarser graph. It uses a simple hash-table 
+    for identifying the adjacent vertices that get collapsed to the same
+    node. The hash-table can have conflicts, which are handled via a
+    linear scan. It relies on the perm[] array to visit the vertices in
+    increasing cnvtxs order.
+ */
+/*************************************************************************/
+void CreateCoarseGraphPerm(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match, idx_t *perm)
+{
+  idx_t i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, 
+        v, u, mask, dovsize;
+  idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;
+  idx_t *cmap, *htable;
+  idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;
+  graph_t *cgraph;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));
+
+  dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);
+
+  mask = HTLENGTH;
+
+  nvtxs   = graph->nvtxs;
+  ncon    = graph->ncon;
+  xadj    = graph->xadj;
+  vwgt    = graph->vwgt;
+  vsize   = graph->vsize;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  cmap    = graph->cmap;
+
+  /* Initialize the coarser graph */
+  cgraph   = SetupCoarseGraph(graph, cnvtxs, dovsize);
+  cxadj    = cgraph->xadj;
+  cvwgt    = cgraph->vwgt;
+  cvsize   = cgraph->vsize;
+  cadjncy  = cgraph->adjncy;
+  cadjwgt  = cgraph->adjwgt;
+
+  htable = iset(mask+1, -1, iwspacemalloc(ctrl, mask+1)); 
+
+  cxadj[0] = cnvtxs = cnedges = 0;
+  for (i=0; i<nvtxs; i++) {
+    v = perm[i];
+    if (cmap[v] != cnvtxs) 
+      continue;
+
+    u = match[v];
+    if (ncon == 1)
+      cvwgt[cnvtxs] = vwgt[v];
+    else
+      icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);
+
+    if (dovsize)
+      cvsize[cnvtxs] = vsize[v];
+
+    nedges = 0;
+
+    istart = xadj[v];
+    iend = xadj[v+1];
+    for (j=istart; j<iend; j++) {
+      k  = cmap[adjncy[j]];
+      kk = k&mask;
+      if ((m = htable[kk]) == -1) {
+        cadjncy[nedges] = k;
+        cadjwgt[nedges] = adjwgt[j];
+        htable[kk] = nedges++;
+      }
+      else if (cadjncy[m] == k) {
+        cadjwgt[m] += adjwgt[j];
+      }
+      else {
+        for (jj=0; jj<nedges; jj++) {
+          if (cadjncy[jj] == k) {
+            cadjwgt[jj] += adjwgt[j];
+            break;
+          }
+        }
+        if (jj == nedges) {
+          cadjncy[nedges] = k;
+          cadjwgt[nedges++] = adjwgt[j];
+        }
+      }
+    }
+
+    if (v != u) { 
+      if (ncon == 1)
+        cvwgt[cnvtxs] += vwgt[u];
+      else
+        iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);
+
+      if (dovsize)
+        cvsize[cnvtxs] += vsize[u];
+
+      istart = xadj[u];
+      iend = xadj[u+1];
+      for (j=istart; j<iend; j++) {
+        k  = cmap[adjncy[j]];
+        kk = k&mask;
+        if ((m = htable[kk]) == -1) {
+          cadjncy[nedges] = k;
+          cadjwgt[nedges] = adjwgt[j];
+          htable[kk] = nedges++;
+        }
+        else if (cadjncy[m] == k) {
+          cadjwgt[m] += adjwgt[j];
+        }
+        else {
+          for (jj=0; jj<nedges; jj++) {
+            if (cadjncy[jj] == k) {
+              cadjwgt[jj] += adjwgt[j];
+              break;
+            }
+          }
+          if (jj == nedges) {
+            cadjncy[nedges] = k;
+            cadjwgt[nedges++] = adjwgt[j];
+          }
+        }
+      }
+
+      /* Remove the contracted adjacency weight */
+      jj = htable[cnvtxs&mask];
+      if (jj >= 0 && cadjncy[jj] != cnvtxs) {
+        for (jj=0; jj<nedges; jj++) {
+          if (cadjncy[jj] == cnvtxs) 
+            break;
+        }
+      }
+      if (jj >= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */
+        cadjncy[jj] = cadjncy[--nedges];
+        cadjwgt[jj] = cadjwgt[nedges];
+      }
+    }
+
+    for (j=0; j<nedges; j++)
+      htable[cadjncy[j]&mask] = -1;  /* Zero out the htable */
+    htable[cnvtxs&mask] = -1;
+
+    cnedges += nedges;
+    cxadj[++cnvtxs] = cnedges;
+    cadjncy += nedges;
+    cadjwgt += nedges;
+  }
+
+  cgraph->nedges = cnedges;
+
+  for (i=0; i<ncon; i++) {
+    cgraph->tvwgt[i]    = isum(cgraph->nvtxs, cgraph->vwgt+i, ncon);
+    cgraph->invtvwgt[i] = 1.0/(cgraph->tvwgt[i] > 0 ? cgraph->tvwgt[i] : 1);
+  }
+
+
+  ReAdjustMemory(ctrl, graph, cgraph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! Setup the various arrays for the coarse graph 
+ */
+/*************************************************************************/
+graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, idx_t dovsize)
+{
+  graph_t *cgraph;
+
+  cgraph = CreateGraph();
+
+  cgraph->nvtxs = cnvtxs;
+  cgraph->ncon  = graph->ncon;
+
+  cgraph->finer  = graph;
+  graph->coarser = cgraph;
+
+
+  /* Allocate memory for the coarser graph */
+  cgraph->xadj     = imalloc(cnvtxs+1, "SetupCoarseGraph: xadj");
+  cgraph->adjncy   = imalloc(graph->nedges,   "SetupCoarseGraph: adjncy");
+  cgraph->adjwgt   = imalloc(graph->nedges,   "SetupCoarseGraph: adjwgt");
+  cgraph->vwgt     = imalloc(cgraph->ncon*cnvtxs, "SetupCoarseGraph: vwgt");
+  cgraph->tvwgt    = imalloc(cgraph->ncon, "SetupCoarseGraph: tvwgt");
+  cgraph->invtvwgt = rmalloc(cgraph->ncon, "SetupCoarseGraph: invtvwgt");
+
+  if (dovsize)
+    cgraph->vsize = imalloc(cnvtxs,   "SetupCoarseGraph: vsize");
+
+  return cgraph;
+}
+
+
+/*************************************************************************/
+/*! This function re-adjusts the amount of memory that was allocated if
+    it will lead to significant savings 
+ */
+/*************************************************************************/
+void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph) 
+{
+  if (cgraph->nedges > 10000 && cgraph->nedges < 0.9*graph->nedges) {
+    cgraph->adjncy = irealloc(cgraph->adjncy, cgraph->nedges, "ReAdjustMemory: adjncy");
+    cgraph->adjwgt = irealloc(cgraph->adjwgt, cgraph->nedges, "ReAdjustMemory: adjwgt");
+  }
+}
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/compress.c b/3rdParty/metis/metis-5.1.0/libmetis/compress.c
new file mode 100644
index 000000000..d72472b25
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/compress.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * compress.c
+ *
+ * This file contains code for compressing nodes with identical adjacency
+ * structure and for prunning dense columns
+ *
+ * Started 9/17/97
+ * George
+ */
+
+#include "metislib.h"
+
+/*************************************************************************/
+/*! This function compresses a graph by merging identical vertices
+    The compression should lead to at least 10% reduction. 
+
+    The compressed graph that is generated has its adjwgts set to 1.
+
+    \returns 1 if compression was performed, otherwise it returns 0.
+ 
+*/
+/**************************************************************************/
+graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, 
+             idx_t *vwgt, idx_t *cptr, idx_t *cind)
+{
+  idx_t i, ii, iii, j, jj, k, l, cnvtxs, cnedges;
+  idx_t *cxadj, *cadjncy, *cvwgt, *mark, *map;
+  ikv_t *keys;
+  graph_t *graph=NULL;
+
+  mark = ismalloc(nvtxs, -1, "CompressGraph: mark");
+  map  = ismalloc(nvtxs, -1, "CompressGraph: map");
+  keys = ikvmalloc(nvtxs, "CompressGraph: keys");
+
+  /* Compute a key for each adjacency list */
+  for (i=0; i<nvtxs; i++) {
+    k = 0;
+    for (j=xadj[i]; j<xadj[i+1]; j++)
+      k += adjncy[j];
+    keys[i].key = k+i; /* Add the diagonal entry as well */
+    keys[i].val = i;
+  }
+
+  ikvsorti(nvtxs, keys);
+
+  l = cptr[0] = 0;
+  for (cnvtxs=i=0; i<nvtxs; i++) {
+    ii = keys[i].val;
+    if (map[ii] == -1) {
+      mark[ii] = i;  /* Add the diagonal entry */
+      for (j=xadj[ii]; j<xadj[ii+1]; j++) 
+        mark[adjncy[j]] = i;
+
+      map[ii]   = cnvtxs;
+      cind[l++] = ii;
+
+      for (j=i+1; j<nvtxs; j++) {
+        iii = keys[j].val;
+
+        if (keys[i].key != keys[j].key || xadj[ii+1]-xadj[ii] != xadj[iii+1]-xadj[iii])
+          break; /* Break if keys or degrees are different */
+
+        if (map[iii] == -1) { /* Do a comparison if iii has not been mapped */ 
+          for (jj=xadj[iii]; jj<xadj[iii+1]; jj++) {
+            if (mark[adjncy[jj]] != i)
+              break;
+          }
+
+          if (jj == xadj[iii+1]) { /* Identical adjacency structure */
+            map[iii]  = cnvtxs;
+            cind[l++] = iii;
+          }
+        }
+      }
+
+      cptr[++cnvtxs] = l;
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_INFO, 
+        printf("  Compression: reduction in # of vertices: %"PRIDX".\n", nvtxs-cnvtxs)); 
+
+
+  if (cnvtxs < COMPRESSION_FRACTION*nvtxs) {
+    /* Sufficient compression is possible, so go ahead and create the 
+       compressed graph */
+
+    graph = CreateGraph();
+
+    cnedges = 0;
+    for (i=0; i<cnvtxs; i++) {
+      ii = cind[cptr[i]];
+      cnedges += xadj[ii+1]-xadj[ii];
+    }
+
+    /* Allocate memory for the compressed graph */
+    cxadj   = graph->xadj   = imalloc(cnvtxs+1, "CompressGraph: xadj");
+    cvwgt   = graph->vwgt   = ismalloc(cnvtxs, 0, "CompressGraph: vwgt");
+    cadjncy = graph->adjncy = imalloc(cnedges, "CompressGraph: adjncy");
+              graph->adjwgt = ismalloc(cnedges, 1, "CompressGraph: adjwgt");
+
+    /* Now go and compress the graph */
+    iset(nvtxs, -1, mark);
+    l = cxadj[0] = 0;
+    for (i=0; i<cnvtxs; i++) {
+      mark[i] = i;  /* Remove any dioganal entries in the compressed graph */
+      for (j=cptr[i]; j<cptr[i+1]; j++) {
+        ii = cind[j];
+
+        /* accumulate the vertex weights of the consistuent vertices */
+        cvwgt[i] += (vwgt == NULL ? 1 : vwgt[ii]);
+
+        /* generate the combined adjancency list */
+        for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
+          k = map[adjncy[jj]];
+          if (mark[k] != i) {
+            mark[k] = i;
+            cadjncy[l++] = k;
+          }
+        }
+      }
+      cxadj[i+1] = l;
+    }
+
+    graph->nvtxs  = cnvtxs;
+    graph->nedges = l;
+    graph->ncon   = 1;
+
+    SetupGraph_tvwgt(graph);
+    SetupGraph_label(graph);
+  }
+
+  gk_free((void **)&keys, &map, &mark, LTERM);
+
+  return graph;
+
+}
+
+
+
+/*************************************************************************/
+/*! This function prunes all the vertices in a graph with degree greater 
+    than factor*average. 
+
+    \returns the number of vertices that were prunned.
+*/
+/*************************************************************************/
+graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, 
+             idx_t *vwgt, idx_t *iperm, real_t factor)
+{
+  idx_t i, j, k, l, nlarge, pnvtxs, pnedges;
+  idx_t *pxadj, *padjncy, *padjwgt, *pvwgt;
+  idx_t *perm;
+  graph_t *graph=NULL;
+
+  perm = imalloc(nvtxs, "PruneGraph: perm");
+
+  factor = factor*xadj[nvtxs]/nvtxs;
+
+  pnvtxs = pnedges = nlarge = 0;
+  for (i=0; i<nvtxs; i++) {
+    if (xadj[i+1]-xadj[i] < factor) {
+      perm[i] = pnvtxs;
+      iperm[pnvtxs++] = i;
+      pnedges += xadj[i+1]-xadj[i];
+    }
+    else {
+      perm[i] = nvtxs - ++nlarge;
+      iperm[nvtxs-nlarge] = i;
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_INFO, 
+        printf("  Pruned %"PRIDX" of %"PRIDX" vertices.\n", nlarge, nvtxs)); 
+
+
+  if (nlarge > 0 && nlarge < nvtxs) {  
+    /* Prunning is possible, so go ahead and create the prunned graph */
+    graph = CreateGraph();
+
+    /* Allocate memory for the prunned graph*/
+    pxadj   = graph->xadj   = imalloc(pnvtxs+1, "PruneGraph: xadj");
+    pvwgt   = graph->vwgt   = imalloc(pnvtxs, "PruneGraph: vwgt");
+    padjncy = graph->adjncy = imalloc(pnedges, "PruneGraph: adjncy");
+              graph->adjwgt = ismalloc(pnedges, 1, "PruneGraph: adjwgt");
+
+    pxadj[0] = pnedges = l = 0;
+    for (i=0; i<nvtxs; i++) {
+      if (xadj[i+1]-xadj[i] < factor) {
+        pvwgt[l] = (vwgt == NULL ? 1 : vwgt[i]);
+        
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          k = perm[adjncy[j]];
+          if (k < pnvtxs) 
+            padjncy[pnedges++] = k;
+        }
+        pxadj[++l] = pnedges;
+      }
+    }
+
+    graph->nvtxs  = pnvtxs;
+    graph->nedges = pnedges;
+    graph->ncon   = 1;
+
+    SetupGraph_tvwgt(graph);
+    SetupGraph_label(graph);
+  }
+  else if (nlarge > 0 && nlarge == nvtxs) {  
+    IFSET(ctrl->dbglvl, METIS_DBG_INFO, 
+          printf("  Pruning is ignored as it removes all vertices.\n"));
+    nlarge = 0;
+  }
+
+
+  gk_free((void **)&perm, LTERM);
+
+  return graph;
+}
+
+
+
+
+
+
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/contig.c b/3rdParty/metis/metis-5.1.0/libmetis/contig.c
new file mode 100644
index 000000000..3f45902db
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/contig.c
@@ -0,0 +1,699 @@
+/*!
+\file 
+\brief Functions that deal with eliminating disconnected partitions
+
+\date Started 7/15/98
+\author George
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version $Id: contig.c 10513 2011-07-07 22:06:03Z karypis $
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function finds the connected components induced by the 
+    partitioning vector.
+
+    \param graph is the graph structure
+    \param where is the partitioning vector. If this is NULL, then the
+           entire graph is treated to belong into a single partition.
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be graph->nvtxs+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be graph->nvtxs.
+
+    \returns the number of components that it found.
+
+    \note The cptr and cind parameters can be NULL, in which case only the
+          number of connected components is returned.
+*/
+/*************************************************************************/
+idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, 
+          idx_t *cptr, idx_t *cind)
+{
+  idx_t i, ii, j, jj, k, me=0, nvtxs, first, last, nleft, ncmps;
+  idx_t *xadj, *adjncy;
+  idx_t *touched, *perm, *todo;
+  idx_t mustfree_ccsr=0, mustfree_where=0;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = imalloc(nvtxs+1, "FindPartitionInducedComponents: cptr");
+    cind = imalloc(nvtxs, "FindPartitionInducedComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* Deal with NULL supplied where vector */
+  if (where == NULL) {
+    where = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: where");
+    mustfree_where = 1;
+  }
+
+  /* Allocate memory required for the BFS traversal */
+  perm    = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: perm"));
+  todo    = iincset(nvtxs, 0, imalloc(nvtxs, "FindPartitionInducedComponents: todo"));
+  touched = ismalloc(nvtxs, 0, "FindPartitionInducedComponents: touched");
+
+
+  /* Find the connected componends induced by the partition */
+  ncmps = -1;
+  first = last = 0;
+  nleft = nvtxs;
+  while (nleft > 0) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;
+      ASSERT(touched[todo[0]] == 0);
+      i = todo[0];
+      cind[last++] = i;
+      touched[i] = 1;
+      me = where[i];
+    }
+
+    i = cind[first++];
+    k = perm[i];
+    j = todo[k] = todo[--nleft];
+    perm[j] = k;
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (where[k] == me && !touched[k]) {
+        cind[last++] = k;
+        touched[k] = 1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+  if (mustfree_where)
+    gk_free((void **)&where, LTERM);
+
+  gk_free((void **)&perm, &todo, &touched, LTERM);
+
+  return ncmps;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    breadth-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param ctrl is the control structure
+    \param graph is the graph structure
+    \param perm is the array that upon completion, perm[i] will store
+           the ID of the vertex that corresponds to the ith vertex in the
+           re-ordered graph.
+*/
+/*************************************************************************/
+void ComputeBFSOrdering(ctrl_t *ctrl, graph_t *graph, idx_t *bfsperm)
+{
+  idx_t i, j, k, nvtxs, first, last;
+  idx_t *xadj, *adjncy, *perm;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Allocate memory required for the BFS traversal */
+  perm = iincset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+
+  iincset(nvtxs, 0, bfsperm);  /* this array will also store the vertices
+                                  still to be processed */
+
+  /* Find the connected componends induced by the partition */
+  first = last = 0;
+  while (first < nvtxs) {
+    if (first == last) { /* Find another starting vertex */
+      k = bfsperm[last];
+      ASSERT(perm[k] != -1);
+      perm[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = bfsperm[first++];
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      /* if a node has been already been visited, its perm[] will be -1 */
+      if (perm[k] != -1) {
+        /* perm[k] is the location within bfsperm of where k resides; 
+           put in that location bfsperm[last] that we are about to
+           overwrite and update perm[bfsperm[last]] to reflect that. */
+        bfsperm[perm[k]]    = bfsperm[last];
+        perm[bfsperm[last]] = perm[k];
+
+        bfsperm[last++] = k;  /* put node at the end of the "queue" */
+        perm[k]         = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function checks whether a graph is contiguous or not. 
+ */
+/**************************************************************************/
+idx_t IsConnected(graph_t *graph, idx_t report)
+{
+  idx_t ncmps;
+
+  ncmps = FindPartitionInducedComponents(graph, NULL, NULL, NULL);
+
+  if (ncmps != 1 && report)
+    printf("The graph is not connected. It has %"PRIDX" connected components.\n", ncmps);
+
+  return (ncmps == 1);
+}
+
+
+/*************************************************************************/
+/*! This function checks whether or not partition pid is contigous
+  */
+/*************************************************************************/
+idx_t IsConnectedSubdomain(ctrl_t *ctrl, graph_t *graph, idx_t pid, idx_t report)
+{
+  idx_t i, j, k, nvtxs, first, last, nleft, ncmps, wgt;
+  idx_t *xadj, *adjncy, *where, *touched, *queue;
+  idx_t *cptr;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+
+  touched = ismalloc(nvtxs, 0, "IsConnected: touched");
+  queue   = imalloc(nvtxs, "IsConnected: queue");
+  cptr    = imalloc(nvtxs+1, "IsConnected: cptr");
+
+  nleft = 0;
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] == pid) 
+      nleft++;
+  }
+
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] == pid) 
+      break;
+  }
+
+  touched[i] = 1;
+  queue[0] = i;
+  first = 0; last = 1;
+
+  cptr[0] = 0;  /* This actually points to queue */
+  ncmps = 0;
+  while (first != nleft) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;
+      for (i=0; i<nvtxs; i++) {
+        if (where[i] == pid && !touched[i])
+          break;
+      }
+      queue[last++] = i;
+      touched[i] = 1;
+    }
+
+    i = queue[first++];
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (where[k] == pid && !touched[k]) {
+        queue[last++] = k;
+        touched[k] = 1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  if (ncmps > 1 && report) {
+    printf("The graph has %"PRIDX" connected components in partition %"PRIDX":\t", ncmps, pid);
+    for (i=0; i<ncmps; i++) {
+      wgt = 0;
+      for (j=cptr[i]; j<cptr[i+1]; j++)
+        wgt += graph->vwgt[queue[j]];
+      printf("[%5"PRIDX" %5"PRIDX"] ", cptr[i+1]-cptr[i], wgt);
+      /*
+      if (cptr[i+1]-cptr[i] == 1)
+        printf("[%"PRIDX" %"PRIDX"] ", queue[cptr[i]], xadj[queue[cptr[i]]+1]-xadj[queue[cptr[i]]]);
+      */
+    }
+    printf("\n");
+  }
+
+  gk_free((void **)&touched, &queue, &cptr, LTERM);
+
+  return (ncmps == 1 ? 1 : 0);
+}
+
+
+/*************************************************************************/
+/*! This function identifies the number of connected components in a graph
+    that result after removing the vertices that belong to the vertex 
+    separator (i.e., graph->where[i] == 2).
+    The connected component memberships are returned in the CSR-style 
+    pair of arrays cptr, cind.
+*/
+/**************************************************************************/
+idx_t FindSepInducedComponents(ctrl_t *ctrl, graph_t *graph, idx_t *cptr, 
+          idx_t *cind)
+{
+  idx_t i, j, k, nvtxs, first, last, nleft, ncmps, wgt;
+  idx_t *xadj, *adjncy, *where, *touched, *queue;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+
+  touched = ismalloc(nvtxs, 0, "IsConnected: queue");
+
+  for (i=0; i<graph->nbnd; i++)
+    touched[graph->bndind[i]] = 1;
+
+  queue = cind;
+
+  nleft = 0;
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] != 2) 
+      nleft++;
+  }
+
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] != 2)
+      break;
+  }
+
+  touched[i] = 1;
+  queue[0]   = i;
+  first      = 0; 
+  last       = 1;
+  cptr[0]    = 0;  /* This actually points to queue */
+  ncmps      = 0;
+
+  while (first != nleft) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;
+      for (i=0; i<nvtxs; i++) {
+        if (!touched[i])
+          break;
+      }
+      queue[last++] = i;
+      touched[i] = 1;
+    }
+
+    i = queue[first++];
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (!touched[k]) {
+        queue[last++] = k;
+        touched[k] = 1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  gk_free((void **)&touched, LTERM);
+
+  return ncmps;
+}
+
+
+/*************************************************************************/
+/*! This function finds all the connected components induced by the 
+    partitioning vector in graph->where and tries to push them around to 
+    remove some of them. */
+/*************************************************************************/
+void EliminateComponents(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, jj, k, me, nparts, nvtxs, ncon, ncmps, other, 
+        ncand, target;
+  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts;
+  idx_t *cptr, *cind, *cpvec, *pcptr, *pcind, *cwhere;
+  idx_t cid, bestcid, *cwgt, *bestcwgt;
+  idx_t ntodo, oldntodo, *todo;
+  rkv_t *cand;
+  real_t *tpwgts;
+  idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL;  /* volume specific work arrays */
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+  adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt);
+
+  where = graph->where;
+  pwgts = graph->pwgts;
+
+  nparts = ctrl->nparts;
+  tpwgts = ctrl->tpwgts;
+
+  cptr = iwspacemalloc(ctrl, nvtxs+1);
+  cind = iwspacemalloc(ctrl, nvtxs);
+
+  ncmps = FindPartitionInducedComponents(graph, where, cptr, cind);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
+      printf("I found %"PRIDX" components, for this %"PRIDX"-way partition\n", 
+          ncmps, nparts)); 
+
+  /* There are more components than partitions */
+  if (ncmps > nparts) {
+    cwgt     = iwspacemalloc(ctrl, ncon);
+    bestcwgt = iwspacemalloc(ctrl, ncon);
+    cpvec    = iwspacemalloc(ctrl, nparts);
+    pcptr    = iset(nparts+1, 0, iwspacemalloc(ctrl, nparts+1));
+    pcind    = iwspacemalloc(ctrl, ncmps);
+    cwhere   = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+    todo     = iwspacemalloc(ctrl, ncmps);
+    cand     = (rkv_t *)wspacemalloc(ctrl, nparts*sizeof(rkv_t));
+
+    if (ctrl->objtype == METIS_OBJTYPE_VOL) {
+      /* Vol-refinement specific working arrays */
+      modind  = iwspacemalloc(ctrl, nvtxs);
+      vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+      pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+    }
+
+
+    /* Get a CSR representation of the components-2-partitions mapping */
+    for (i=0; i<ncmps; i++) 
+      pcptr[where[cind[cptr[i]]]]++;
+    MAKECSR(i, nparts, pcptr);
+    for (i=0; i<ncmps; i++) 
+      pcind[pcptr[where[cind[cptr[i]]]]++] = i;
+    SHIFTCSR(i, nparts, pcptr);
+
+    /* Assign the heaviest component of each partition to its original partition */
+    for (ntodo=0, i=0; i<nparts; i++) {
+      if (pcptr[i+1]-pcptr[i] == 1)
+        bestcid = pcind[pcptr[i]];
+      else {
+        for (bestcid=-1, j=pcptr[i]; j<pcptr[i+1]; j++) {
+          cid = pcind[j];
+          iset(ncon, 0, cwgt);
+          for (ii=cptr[cid]; ii<cptr[cid+1]; ii++)
+            iaxpy(ncon, 1, vwgt+cind[ii]*ncon, 1, cwgt, 1);
+          if (bestcid == -1 || isum(ncon, bestcwgt, 1) < isum(ncon, cwgt, 1)) {
+            bestcid  = cid;
+            icopy(ncon, cwgt, bestcwgt);
+          }
+        }
+        /* Keep track of those that need to be dealt with */
+        for (j=pcptr[i]; j<pcptr[i+1]; j++) {
+          if (pcind[j] != bestcid)
+            todo[ntodo++] = pcind[j];
+        }
+      }
+
+      for (j=cptr[bestcid]; j<cptr[bestcid+1]; j++) {
+        ASSERT(where[cind[j]] == i);
+        cwhere[cind[j]] = i;
+      }
+    }
+
+
+    while (ntodo > 0) {
+      oldntodo = ntodo;
+      for (i=0; i<ntodo; i++) {
+        cid = todo[i];
+        me = where[cind[cptr[cid]]];  /* Get the domain of this component */
+
+        /* Determine the weight of the block to be moved */
+        iset(ncon, 0, cwgt);
+        for (j=cptr[cid]; j<cptr[cid+1]; j++) 
+          iaxpy(ncon, 1, vwgt+cind[j]*ncon, 1, cwgt, 1);
+
+        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
+            printf("Trying to move %"PRIDX" [%"PRIDX"] from %"PRIDX"\n", 
+                cid, isum(ncon, cwgt, 1), me)); 
+
+        /* Determine the connectivity */
+        iset(nparts, 0, cpvec);
+        for (j=cptr[cid]; j<cptr[cid+1]; j++) {
+          ii = cind[j];
+          for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) 
+            if (cwhere[adjncy[jj]] != -1)
+              cpvec[cwhere[adjncy[jj]]] += (adjwgt ? adjwgt[jj] : 1);
+        }
+
+        /* Put the neighbors into a cand[] array for sorting */
+        for (ncand=0, j=0; j<nparts; j++) {
+          if (cpvec[j] > 0) {
+            cand[ncand].key   = cpvec[j];
+            cand[ncand++].val = j;
+          }
+        }
+        if (ncand == 0)
+          continue;
+
+        rkvsortd(ncand, cand);
+
+        /* Limit the moves to only the top candidates, which are defined as 
+           those with connectivity at least 50% of the best.
+           This applies only when ncon=1, as for multi-constraint, balancing
+           will be hard. */
+        if (ncon == 1) {
+          for (j=1; j<ncand; j++) {
+            if (cand[j].key < .5*cand[0].key)
+              break;
+          }
+          ncand = j;
+        }
+      
+        /* Now among those, select the one with the best balance */
+        target = cand[0].val;
+        for (j=1; j<ncand; j++) {
+          if (BetterBalanceKWay(ncon, cwgt, ctrl->ubfactors,
+                1, pwgts+target*ncon, ctrl->pijbm+target*ncon,
+                1, pwgts+cand[j].val*ncon, ctrl->pijbm+cand[j].val*ncon))
+            target = cand[j].val;
+        }
+
+        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
+            printf("\tMoving it to %"PRIDX" [%"PRIDX"] [%"PRIDX"]\n", target, cpvec[target], ncand));
+
+        /* Note that as a result of a previous movement, a connected component may
+           now will like to stay to its original partition */
+        if (target != me) {
+          switch (ctrl->objtype) {
+            case METIS_OBJTYPE_CUT:
+              MoveGroupContigForCut(ctrl, graph, target, cid, cptr, cind);
+              break;
+
+            case METIS_OBJTYPE_VOL:
+              MoveGroupContigForVol(ctrl, graph, target, cid, cptr, cind, 
+                  vmarker, pmarker, modind);
+              break;
+
+            default:
+              gk_errexit(SIGERR, "Unknown objtype %d\n", ctrl->objtype);
+          }
+        }
+
+        /* Update the cwhere vector */
+        for (j=cptr[cid]; j<cptr[cid+1]; j++) 
+          cwhere[cind[j]] = target;
+
+        todo[i] = todo[--ntodo];
+      }
+      if (oldntodo == ntodo) {
+        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, printf("Stopped at ntodo: %"PRIDX"\n", ntodo));
+        break;
+      }
+    }
+
+    for (i=0; i<nvtxs; i++)
+      ASSERT(where[i] == cwhere[i]);
+
+  }
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function moves a collection of vertices and updates their rinfo 
+ */
+/*************************************************************************/
+void MoveGroupContigForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, 
+         idx_t *ptr, idx_t *ind)
+{
+  idx_t i, ii, iii, j, jj, k, l, nvtxs, nbnd, from, me;
+  idx_t *xadj, *adjncy, *adjwgt, *where, *bndptr, *bndind;
+  ckrinfo_t *myrinfo;
+  cnbr_t *mynbrs;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  where  = graph->where;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  nbnd = graph->nbnd;
+
+  for (iii=ptr[gid]; iii<ptr[gid+1]; iii++) {
+    i    = ind[iii];
+    from = where[i];
+
+    myrinfo = graph->ckrinfo+i;
+    if (myrinfo->inbr == -1) {
+      myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+      myrinfo->nnbrs = 0;
+    }
+    mynbrs = ctrl->cnbrpool + myrinfo->inbr; 
+
+    /* find the location of 'to' in myrinfo or create it if it is not there */
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      if (mynbrs[k].pid == to)
+        break;
+    }
+    if (k == myrinfo->nnbrs) {
+      mynbrs[k].pid = to;
+      mynbrs[k].ed = 0;
+      myrinfo->nnbrs++;
+    }
+
+    graph->mincut -= mynbrs[k].ed-myrinfo->id;
+
+    /* Update ID/ED and BND related information for the moved vertex */
+    iaxpy(graph->ncon,  1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon,   1);
+    iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1);
+    UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, 
+        bndptr, bndind, BNDTYPE_REFINE);
+
+    /* Update the degrees of adjacent vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      ii = adjncy[j];
+      me = where[ii];
+      myrinfo = graph->ckrinfo+ii;
+
+      UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me,
+          from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE);
+    }
+
+    ASSERT(CheckRInfo(ctrl, graph->ckrinfo+i));
+  }
+
+  graph->nbnd = nbnd;
+}
+
+
+/*************************************************************************/
+/*! This function moves a collection of vertices and updates their rinfo 
+ */
+/*************************************************************************/
+void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, 
+         idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker, 
+         idx_t *modind)
+{
+  idx_t i, ii, iii, j, jj, k, l, nvtxs, from, me, other, xgain;
+  idx_t *xadj, *vsize, *adjncy, *where;
+  vkrinfo_t *myrinfo, *orinfo;
+  vnbr_t *mynbrs, *onbrs;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vsize  = graph->vsize;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+
+  for (iii=ptr[gid]; iii<ptr[gid+1]; iii++) {
+    i    = ind[iii];
+    from = where[i];
+
+    myrinfo = graph->vkrinfo+i;
+    if (myrinfo->inbr == -1) {
+      myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+      myrinfo->nnbrs = 0;
+    }
+    mynbrs = ctrl->vnbrpool + myrinfo->inbr; 
+
+    xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0);
+
+    /* find the location of 'to' in myrinfo or create it if it is not there */
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      if (mynbrs[k].pid == to)
+        break;
+    }
+    if (k == myrinfo->nnbrs) {
+      if (myrinfo->nid > 0)
+        xgain -= vsize[i];
+
+      /* determine the volume gain resulting from that move */
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii     = adjncy[j];
+        other  = where[ii];
+        orinfo = graph->vkrinfo+ii;
+        onbrs  = ctrl->vnbrpool + orinfo->inbr;
+        ASSERT(other != to)
+
+        if (from == other) {
+          /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == to)
+              break;
+          }
+          if (l == orinfo->nnbrs) 
+            xgain -= vsize[ii];
+        }
+        else {
+          /* Remote vertex: increase if 'to' is a new subdomain */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == to)
+              break;
+          }
+          if (l == orinfo->nnbrs) 
+            xgain -= vsize[ii];
+
+          /* Remote vertex: decrease if i is the only connection to 'from' */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == from && onbrs[l].ned == 1) {
+              xgain += vsize[ii];
+              break;
+            }
+          }
+        }
+      }
+      graph->minvol -= xgain;
+      graph->mincut -= -myrinfo->nid;
+    }
+    else {
+      graph->minvol -= (xgain + mynbrs[k].gv);
+      graph->mincut -= mynbrs[k].ned-myrinfo->nid;
+    }
+
+
+    /* Update where and pwgts */
+    where[i] = to;
+    iaxpy(graph->ncon,  1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon,   1);
+    iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1);
+
+    /* Update the id/ed/gains/bnd of potentially affected nodes */
+    KWayVolUpdate(ctrl, graph, i, from, to, NULL, NULL, NULL, NULL,
+        NULL, BNDTYPE_REFINE, vmarker, pmarker, modind);
+
+    /*CheckKWayVolPartitionParams(ctrl, graph);*/
+  }
+
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+  ASSERTP(ComputeVolume(graph, where) == graph->minvol,
+      ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol));
+
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/debug.c b/3rdParty/metis/metis-5.1.0/libmetis/debug.c
new file mode 100644
index 000000000..e188135da
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/debug.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * debug.c
+ *
+ * This file contains code that performs self debuging
+ *
+ * Started 7/24/97
+ * George
+ *
+ */
+
+#include "metislib.h"
+
+
+
+/*************************************************************************/
+/*! This function computes the total edgecut 
+ */
+/*************************************************************************/
+idx_t ComputeCut(graph_t *graph, idx_t *where)
+{
+  idx_t i, j, cut;
+
+  if (graph->adjwgt == NULL) {
+    for (cut=0, i=0; i<graph->nvtxs; i++) {
+      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
+        if (where[i] != where[graph->adjncy[j]])
+          cut++;
+    }
+  }
+  else {
+    for (cut=0, i=0; i<graph->nvtxs; i++) {
+      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
+        if (where[i] != where[graph->adjncy[j]])
+          cut += graph->adjwgt[j];
+    }
+  }
+
+  return cut/2;
+}
+
+
+/*************************************************************************/
+/*! This function computes the total volume 
+ */
+/*************************************************************************/
+idx_t ComputeVolume(graph_t *graph, idx_t *where)
+{
+  idx_t i, j, k, me, nvtxs, nparts, totalv;
+  idx_t *xadj, *adjncy, *vsize, *marker;
+
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vsize  = graph->vsize;
+
+  nparts = where[iargmax(nvtxs, where)]+1;
+  marker = ismalloc(nparts, -1, "ComputeVolume: marker");
+
+  totalv = 0;
+
+  for (i=0; i<nvtxs; i++) {
+    marker[where[i]] = i;
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = where[adjncy[j]];
+      if (marker[k] != i) {
+        marker[k] = i;
+        totalv += (vsize ? vsize[i] : 1);
+      }
+    }
+  }
+
+  gk_free((void **)&marker, LTERM);
+
+  return totalv;
+}
+
+
+/*************************************************************************/
+/*! This function computes the cut given the graph and a where vector 
+ */
+/*************************************************************************/
+idx_t ComputeMaxCut(graph_t *graph, idx_t nparts, idx_t *where)
+{
+  idx_t i, j, maxcut;
+  idx_t *cuts;
+
+  cuts = ismalloc(nparts, 0, "ComputeMaxCut: cuts");
+
+  if (graph->adjwgt == NULL) {
+    for (i=0; i<graph->nvtxs; i++) {
+      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
+        if (where[i] != where[graph->adjncy[j]]) 
+          cuts[where[i]]++;
+    }
+  }
+  else {
+    for (i=0; i<graph->nvtxs; i++) {
+      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
+        if (where[i] != where[graph->adjncy[j]])
+          cuts[where[i]] += graph->adjwgt[j];
+    }
+  }
+
+  maxcut = cuts[iargmax(nparts, cuts)];
+
+  printf("%zu => %"PRIDX"\n", iargmax(nparts, cuts), maxcut);
+
+  gk_free((void **)&cuts, LTERM);
+
+  return maxcut;
+}
+
+
+/*************************************************************************/
+/*! This function checks whether or not the boundary information is correct 
+ */
+/*************************************************************************/
+idx_t CheckBnd(graph_t *graph) 
+{
+  idx_t i, j, nvtxs, nbnd;
+  idx_t *xadj, *adjncy, *where, *bndptr, *bndind;
+
+  nvtxs = graph->nvtxs;
+  xadj = graph->xadj;
+  adjncy = graph->adjncy;
+  where = graph->where;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  for (nbnd=0, i=0; i<nvtxs; i++) {
+    if (xadj[i+1]-xadj[i] == 0)
+      nbnd++;   /* Islands are considered to be boundary vertices */
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      if (where[i] != where[adjncy[j]]) {
+        nbnd++;
+        ASSERT(bndptr[i] != -1);
+        ASSERT(bndind[bndptr[i]] == i);
+        break;
+      }
+    }
+  }
+
+  ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd));
+
+  return 1;
+}
+
+
+
+/*************************************************************************/
+/*! This function checks whether or not the boundary information is correct 
+ */
+/*************************************************************************/
+idx_t CheckBnd2(graph_t *graph) 
+{
+  idx_t i, j, nvtxs, nbnd, id, ed;
+  idx_t *xadj, *adjncy, *where, *bndptr, *bndind;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  for (nbnd=0, i=0; i<nvtxs; i++) {
+    id = ed = 0;
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      if (where[i] != where[adjncy[j]]) 
+        ed += graph->adjwgt[j];
+      else
+        id += graph->adjwgt[j];
+    }
+    if (ed - id >= 0 && xadj[i] < xadj[i+1]) {
+      nbnd++;
+      ASSERTP(bndptr[i] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", i, id, ed));
+      ASSERT(bndind[bndptr[i]] == i);
+    }
+  }
+
+  ASSERTP(nbnd == graph->nbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, graph->nbnd));
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function checks whether or not the boundary information is correct 
+ */
+/*************************************************************************/
+idx_t CheckNodeBnd(graph_t *graph, idx_t onbnd) 
+{
+  idx_t i, j, nvtxs, nbnd;
+  idx_t *xadj, *adjncy, *where, *bndptr, *bndind;
+
+  nvtxs = graph->nvtxs;
+  xadj = graph->xadj;
+  adjncy = graph->adjncy;
+  where = graph->where;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  for (nbnd=0, i=0; i<nvtxs; i++) {
+    if (where[i] == 2) 
+      nbnd++;   
+  }
+
+  ASSERTP(nbnd == onbnd, ("%"PRIDX" %"PRIDX"\n", nbnd, onbnd));
+
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] != 2) {
+      ASSERTP(bndptr[i] == -1, ("%"PRIDX" %"PRIDX"\n", i, bndptr[i]));
+    }
+    else {
+      ASSERTP(bndptr[i] != -1, ("%"PRIDX" %"PRIDX"\n", i, bndptr[i]));
+    }
+  }
+
+  return 1;
+}
+
+
+
+/*************************************************************************/
+/*! This function checks whether or not the rinfo of a vertex is consistent 
+ */
+/*************************************************************************/
+idx_t CheckRInfo(ctrl_t *ctrl, ckrinfo_t *rinfo)
+{
+  idx_t i, j;
+  cnbr_t *nbrs;
+
+  nbrs = ctrl->cnbrpool + rinfo->inbr;
+
+  for (i=0; i<rinfo->nnbrs; i++) {
+    for (j=i+1; j<rinfo->nnbrs; j++)
+      ASSERTP(nbrs[i].pid != nbrs[j].pid, 
+          ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", 
+           i, j, nbrs[i].pid, nbrs[j].pid));
+  }
+
+  return 1;
+}
+
+
+
+/*************************************************************************/
+/*! This function checks the correctness of the NodeFM data structures 
+ */
+/*************************************************************************/
+idx_t CheckNodePartitionParams(graph_t *graph)
+{
+  idx_t i, j, k, l, nvtxs, me, other;
+  idx_t *xadj, *adjncy, *adjwgt, *vwgt, *where;
+  idx_t edegrees[2], pwgts[3];
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  where  = graph->where;
+
+  /*------------------------------------------------------------
+  / Compute now the separator external degrees
+  /------------------------------------------------------------*/
+  pwgts[0] = pwgts[1] = pwgts[2] = 0;
+  for (i=0; i<nvtxs; i++) {
+    me = where[i];
+    pwgts[me] += vwgt[i];
+
+    if (me == 2) { /* If it is on the separator do some computations */
+      edegrees[0] = edegrees[1] = 0;
+
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        other = where[adjncy[j]];
+        if (other != 2)
+          edegrees[other] += vwgt[adjncy[j]];
+      }
+      if (edegrees[0] != graph->nrinfo[i].edegrees[0] || 
+          edegrees[1] != graph->nrinfo[i].edegrees[1]) {
+        printf("Something wrong with edegrees: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", 
+            i, edegrees[0], edegrees[1], 
+            graph->nrinfo[i].edegrees[0], graph->nrinfo[i].edegrees[1]);
+        return 0;
+      }
+    }
+  }
+
+  if (pwgts[0] != graph->pwgts[0] || 
+      pwgts[1] != graph->pwgts[1] || 
+      pwgts[2] != graph->pwgts[2]) {
+    printf("Something wrong with part-weights: %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", pwgts[0], pwgts[1], pwgts[2], graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]);
+    return 0;
+  }
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function checks if the separator is indeed a separator 
+ */
+/*************************************************************************/
+idx_t IsSeparable(graph_t *graph)
+{
+  idx_t i, j, nvtxs, other;
+  idx_t *xadj, *adjncy, *where;
+
+  nvtxs = graph->nvtxs;
+  xadj = graph->xadj;
+  adjncy = graph->adjncy;
+  where = graph->where;
+
+  for (i=0; i<nvtxs; i++) {
+    if (where[i] == 2)
+      continue;
+    other = (where[i]+1)%2;
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      ASSERTP(where[adjncy[j]] != other, 
+          ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", 
+           i, where[i], adjncy[j], where[adjncy[j]], xadj[i+1]-xadj[i], 
+           xadj[adjncy[j]+1]-xadj[adjncy[j]]));
+    }
+  }
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function recomputes the vrinfo fields and checks them against
+    those in the graph->vrinfo structure */
+/*************************************************************************/
+void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, k, kk, l, nvtxs, nbnd, mincut, minvol, me, other, pid;
+  idx_t *xadj, *vsize, *adjncy, *pwgts, *where, *bndind, *bndptr;
+  vkrinfo_t *rinfo, *myrinfo, *orinfo, tmprinfo;
+  vnbr_t *mynbrs, *onbrs, *tmpnbrs;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vsize  = graph->vsize;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+  rinfo  = graph->vkrinfo;
+
+  tmpnbrs = (vnbr_t *)wspacemalloc(ctrl, ctrl->nparts*sizeof(vnbr_t));
+
+  /*------------------------------------------------------------
+  / Compute now the iv/ev degrees
+  /------------------------------------------------------------*/
+  for (i=0; i<nvtxs; i++) {
+    me = where[i];
+
+    myrinfo = rinfo+i;
+    mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+    for (k=0; k<myrinfo->nnbrs; k++)
+      tmpnbrs[k] = mynbrs[k];
+
+    tmprinfo.nnbrs = myrinfo->nnbrs;
+    tmprinfo.nid    = myrinfo->nid;
+    tmprinfo.ned    = myrinfo->ned;
+
+    myrinfo = &tmprinfo;
+    mynbrs  = tmpnbrs;
+
+    for (k=0; k<myrinfo->nnbrs; k++)
+      mynbrs[k].gv = 0;
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      ii     = adjncy[j];
+      other  = where[ii];
+      orinfo = rinfo+ii;
+      onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+      if (me == other) {
+        /* Find which domains 'i' is connected and 'ii' is not and update their gain */
+        for (k=0; k<myrinfo->nnbrs; k++) {
+          pid = mynbrs[k].pid;
+          for (kk=0; kk<orinfo->nnbrs; kk++) {
+            if (onbrs[kk].pid == pid)
+              break;
+          }
+          if (kk == orinfo->nnbrs) 
+            mynbrs[k].gv -= vsize[ii];
+        }
+      }
+      else {
+        /* Find the orinfo[me].ed and see if I'm the only connection */
+        for (k=0; k<orinfo->nnbrs; k++) {
+          if (onbrs[k].pid == me)
+            break;
+        }
+
+        if (onbrs[k].ned == 1) { /* I'm the only connection of 'ii' in 'me' */
+          for (k=0; k<myrinfo->nnbrs; k++) {
+            if (mynbrs[k].pid == other) {
+              mynbrs[k].gv += vsize[ii];
+              break;
+            }
+          }
+
+          /* Increase the gains for all the common domains between 'i' and 'ii' */
+          for (k=0; k<myrinfo->nnbrs; k++) {
+            if ((pid = mynbrs[k].pid) == other)
+              continue;
+            for (kk=0; kk<orinfo->nnbrs; kk++) {
+              if (onbrs[kk].pid == pid) {
+                mynbrs[k].gv += vsize[ii];
+                break;
+              }
+            }
+          }
+
+        }
+        else {
+          /* Find which domains 'i' is connected and 'ii' is not and update their gain */
+          for (k=0; k<myrinfo->nnbrs; k++) {
+            if ((pid = mynbrs[k].pid) == other)
+              continue;
+            for (kk=0; kk<orinfo->nnbrs; kk++) {
+              if (onbrs[kk].pid == pid)
+                break;
+            }
+            if (kk == orinfo->nnbrs) 
+              mynbrs[k].gv -= vsize[ii];
+          }
+        }
+      }
+    }
+
+    myrinfo = rinfo+i;
+    mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      pid = mynbrs[k].pid;
+      for (kk=0; kk<tmprinfo.nnbrs; kk++) {
+        if (tmpnbrs[kk].pid == pid) {
+          if (tmpnbrs[kk].gv != mynbrs[k].gv)
+            printf("[%8"PRIDX" %8"PRIDX" %8"PRIDX" %+8"PRIDX" %+8"PRIDX"]\n", 
+                i, where[i], pid, mynbrs[k].gv, tmpnbrs[kk].gv);
+          break;
+        }
+      }
+    }
+
+  }
+
+  WCOREPOP;
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/defs.h b/3rdParty/metis/metis-5.1.0/libmetis/defs.h
new file mode 100644
index 000000000..196117838
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/defs.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * defs.h
+ *
+ * This file contains constant definitions
+ *
+ * Started 8/27/94
+ * George
+ *
+ * $Id: defs.h 13933 2013-03-29 22:20:46Z karypis $
+ *
+ */
+
+#ifndef _LIBMETIS_DEFS_H_
+#define _LIBMETIS_DEFS_H_
+
+#define METISTITLE              "METIS 5.0 Copyright 1998-13, Regents of the University of Minnesota\n"
+#define MAXLINE			1280000
+
+#define LTERM			(void **) 0	/* List terminator for gk_free() */
+
+#define HTLENGTH		((1<<11)-1)
+
+#define INIT_MAXNAD             200     /* Initial number of maximum number of 
+                                           adjacent domains. This number will be
+                                           adjusted as required. */
+
+/* Types of boundaries */
+#define BNDTYPE_REFINE          1       /* Used for k-way refinement-purposes */
+#define BNDTYPE_BALANCE         2       /* Used for k-way balancing purposes */
+
+/* Mode of optimization */
+#define OMODE_REFINE            1       /* Optimize the objective function */
+#define OMODE_BALANCE           2       /* Balance the subdomains */
+
+/* Types of vertex statues in the priority queue */
+#define VPQSTATUS_PRESENT      1       /* The vertex is in the queue */
+#define VPQSTATUS_EXTRACTED    2       /* The vertex has been extracted from the queue */
+#define VPQSTATUS_NOTPRESENT   3       /* The vertex is not present in the queue and
+                                          has not been extracted before */
+
+#define UNMATCHED		-1
+
+#define LARGENIPARTS		7	/* Number of random initial partitions */
+#define SMALLNIPARTS		5	/* Number of random initial partitions */
+
+#define COARSEN_FRACTION	0.85	/* Node reduction between succesive coarsening levels */
+
+#define COMPRESSION_FRACTION		0.85
+
+#define MMDSWITCH		        120
+
+/* Default ufactors for the various operational modes */
+#define PMETIS_DEFAULT_UFACTOR          1
+#define MCPMETIS_DEFAULT_UFACTOR        10
+#define KMETIS_DEFAULT_UFACTOR          30
+#define OMETIS_DEFAULT_UFACTOR          200
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/fm.c b/3rdParty/metis/metis-5.1.0/libmetis/fm.c
new file mode 100644
index 000000000..7f5ea6b01
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/fm.c
@@ -0,0 +1,543 @@
+/*!
+\file
+\brief Functions for the edge-based FM refinement
+
+\date Started 7/23/97
+\author George  
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version\verbatim $Id: fm.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************
+* This function performs an edge-based FM refinement
+**************************************************************************/
+void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)
+{
+  if (graph->ncon == 1) 
+    FM_2WayCutRefine(ctrl, graph, ntpwgts, niter);
+  else
+    FM_Mc2WayCutRefine(ctrl, graph, ntpwgts, niter);
+}
+
+
+/*************************************************************************/
+/*! This function performs a cut-focused FM refinement */
+/*************************************************************************/
+void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)
+{
+  idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts;
+  idx_t *moved, *swaps, *perm;
+  rpq_t *queues[2];
+  idx_t higain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt;
+  idx_t tpwgts[2];
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  where  = graph->where;
+  id     = graph->id;
+  ed     = graph->ed;
+  pwgts  = graph->pwgts;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  moved = iwspacemalloc(ctrl, nvtxs);
+  swaps = iwspacemalloc(ctrl, nvtxs);
+  perm  = iwspacemalloc(ctrl, nvtxs);
+
+  tpwgts[0] = graph->tvwgt[0]*ntpwgts[0];
+  tpwgts[1] = graph->tvwgt[0]-tpwgts[0];
+  
+  limit   = gk_min(gk_max(0.01*nvtxs, 15), 100);
+  avgvwgt = gk_min((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs);
+
+  queues[0] = rpqCreate(nvtxs);
+  queues[1] = rpqCreate(nvtxs);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+      Print2WayRefineStats(ctrl, graph, ntpwgts, 0, -2));
+
+  origdiff = iabs(tpwgts[0]-pwgts[0]);
+  iset(nvtxs, -1, moved);
+  for (pass=0; pass<niter; pass++) { /* Do a number of passes */
+    rpqReset(queues[0]);
+    rpqReset(queues[1]);
+
+    mincutorder = -1;
+    newcut = mincut = initcut = graph->mincut;
+    mindiff = iabs(tpwgts[0]-pwgts[0]);
+
+    ASSERT(ComputeCut(graph, where) == graph->mincut);
+    ASSERT(CheckBnd(graph));
+
+    /* Insert boundary nodes in the priority queues */
+    nbnd = graph->nbnd;
+    irandArrayPermute(nbnd, perm, nbnd, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = perm[ii];
+      ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0);
+      ASSERT(bndptr[bndind[i]] != -1);
+      rpqInsert(queues[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]);
+    }
+
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1);
+      to = (from+1)%2;
+
+      if ((higain = rpqGetTop(queues[from])) == -1)
+        break;
+      ASSERT(bndptr[higain] != -1);
+
+      newcut -= (ed[higain]-id[higain]);
+      INC_DEC(pwgts[to], pwgts[from], vwgt[higain]);
+
+      if ((newcut < mincut && iabs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || 
+          (newcut == mincut && iabs(tpwgts[0]-pwgts[0]) < mindiff)) {
+        mincut  = newcut;
+        mindiff = iabs(tpwgts[0]-pwgts[0]);
+        mincutorder = nswaps;
+      }
+      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
+        newcut += (ed[higain]-id[higain]);
+        INC_DEC(pwgts[from], pwgts[to], vwgt[higain]);
+        break;
+      }
+
+      where[higain] = to;
+      moved[higain] = nswaps;
+      swaps[nswaps] = higain;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+        printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1]));
+
+      /**************************************************************
+      * Update the id[i]/ed[i] values of the affected nodes
+      ***************************************************************/
+      SWAP(id[higain], ed[higain], tmp);
+      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
+        BNDDelete(nbnd, bndind,  bndptr, higain);
+
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+
+        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+        INC_DEC(id[k], ed[k], kwgt);
+
+        /* Update its boundary information and queue position */
+        if (bndptr[k] != -1) { /* If k was a boundary vertex */
+          if (ed[k] == 0) { /* Not a boundary vertex any more */
+            BNDDelete(nbnd, bndind, bndptr, k);
+            if (moved[k] == -1)  /* Remove it if in the queues */
+              rpqDelete(queues[where[k]], k);
+          }
+          else { /* If it has not been moved, update its position in the queue */
+            if (moved[k] == -1) 
+              rpqUpdate(queues[where[k]], k, ed[k]-id[k]);
+          }
+        }
+        else {
+          if (ed[k] > 0) {  /* It will now become a boundary vertex */
+            BNDInsert(nbnd, bndind, bndptr, k);
+            if (moved[k] == -1) 
+              rpqInsert(queues[where[k]], k, ed[k]-id[k]);
+          }
+        }
+      }
+
+    }
+
+
+    /****************************************************************
+    * Roll back computations
+    *****************************************************************/
+    for (i=0; i<nswaps; i++)
+      moved[swaps[i]] = -1;  /* reset moved array */
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      to = where[higain] = (where[higain]+1)%2;
+      SWAP(id[higain], ed[higain], tmp);
+      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
+        BNDDelete(nbnd, bndind,  bndptr, higain);
+      else if (ed[higain] > 0 && bndptr[higain] == -1)
+        BNDInsert(nbnd, bndind,  bndptr, higain);
+
+      INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]);
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+
+        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+        INC_DEC(id[k], ed[k], kwgt);
+
+        if (bndptr[k] != -1 && ed[k] == 0)
+          BNDDelete(nbnd, bndind, bndptr, k);
+        if (bndptr[k] == -1 && ed[k] > 0)
+          BNDInsert(nbnd, bndind, bndptr, k);
+      }
+    }
+
+    graph->mincut = mincut;
+    graph->nbnd   = nbnd;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+        Print2WayRefineStats(ctrl, graph, ntpwgts, 0, mincutorder));
+
+    if (mincutorder <= 0 || mincut == initcut)
+      break;
+  }
+
+  rpqDestroy(queues[0]);
+  rpqDestroy(queues[1]);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function performs a cut-focused multi-constraint FM refinement */
+/*************************************************************************/
+void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)
+{
+  idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, 
+        me, limit, tmp, cnum;
+  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *id, *ed, 
+        *bndptr, *bndind;
+  idx_t *moved, *swaps, *perm, *qnum;
+  idx_t higain, mincut, initcut, newcut, mincutorder;
+  real_t *invtvwgt, *ubfactors, *minbalv, *newbalv;
+  real_t origbal, minbal, newbal, rgain, ffactor;
+  rpq_t **queues;
+
+  WCOREPUSH;
+
+  nvtxs    = graph->nvtxs;
+  ncon     = graph->ncon;
+  xadj     = graph->xadj;
+  vwgt     = graph->vwgt;
+  adjncy   = graph->adjncy;
+  adjwgt   = graph->adjwgt;
+  invtvwgt = graph->invtvwgt;
+  where    = graph->where;
+  id       = graph->id;
+  ed       = graph->ed;
+  pwgts    = graph->pwgts;
+  bndptr   = graph->bndptr;
+  bndind   = graph->bndind;
+
+  moved     = iwspacemalloc(ctrl, nvtxs);
+  swaps     = iwspacemalloc(ctrl, nvtxs);
+  perm      = iwspacemalloc(ctrl, nvtxs);
+  qnum      = iwspacemalloc(ctrl, nvtxs);
+  ubfactors = rwspacemalloc(ctrl, ncon);
+  newbalv   = rwspacemalloc(ctrl, ncon);
+  minbalv   = rwspacemalloc(ctrl, ncon);
+
+  limit = gk_min(gk_max(0.01*nvtxs, 25), 150);
+
+
+  /* Determine a fudge factor to allow the refinement routines to get out 
+     of tight balancing constraints. */
+  ffactor = .5/gk_max(20, nvtxs);
+
+  /* Initialize the queues */
+  queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *));
+  for (i=0; i<2*ncon; i++) 
+    queues[i] = rpqCreate(nvtxs);
+  for (i=0; i<nvtxs; i++)
+    qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt);
+
+  /* Determine the unbalance tolerance for each constraint. The tolerance is
+     equal to the maximum of the original load imbalance and the user-supplied
+     allowed tolerance. The rationale behind this approach is to allow the
+     refinement routine to improve the cut, without having to worry about fixing
+     load imbalance problems. The load imbalance is addressed by the balancing
+     routines. */
+  origbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, ubfactors);
+  for (i=0; i<ncon; i++) 
+    ubfactors[i] = (ubfactors[i] > 0 ? ctrl->ubfactors[i]+ubfactors[i] : ctrl->ubfactors[i]);
+
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+      Print2WayRefineStats(ctrl, graph, ntpwgts, origbal, -2));
+
+  iset(nvtxs, -1, moved);
+  for (pass=0; pass<niter; pass++) { /* Do a number of passes */
+    for (i=0; i<2*ncon; i++)  
+      rpqReset(queues[i]);
+
+    mincutorder = -1;
+    newcut = mincut = initcut = graph->mincut;
+
+    minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, minbalv);
+
+    ASSERT(ComputeCut(graph, where) == graph->mincut);
+    ASSERT(CheckBnd(graph));
+
+    /* Insert boundary nodes in the priority queues */
+    nbnd = graph->nbnd;
+    irandArrayPermute(nbnd, perm, nbnd/5, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[perm[ii]];
+      ASSERT(ed[i] > 0 || id[i] == 0);
+      ASSERT(bndptr[i] != -1);
+      //rgain = 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1);
+      //rgain = (ed[i]-id[i] > 0 ? 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1) : ed[i]-id[i]);
+      rgain = ed[i]-id[i];
+      rpqInsert(queues[2*qnum[i]+where[i]], i, rgain);
+    }
+
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      SelectQueue(graph, ctrl->pijbm, ubfactors, queues, &from, &cnum);
+
+      to = (from+1)%2;
+
+      if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1)
+        break;
+      ASSERT(bndptr[higain] != -1);
+
+      newcut -= (ed[higain]-id[higain]);
+
+      iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
+      iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
+      newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, newbalv);
+
+      if ((newcut < mincut && newbal <= ffactor) || 
+          (newcut == mincut && (newbal < minbal || 
+           (newbal == minbal && BetterBalance2Way(ncon, minbalv, newbalv))))) {
+        mincut      = newcut;
+        minbal      = newbal;
+        mincutorder = nswaps;
+        rcopy(ncon, newbalv, minbalv);
+      }
+      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
+        newcut += (ed[higain]-id[higain]);
+        iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
+        iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
+        break;
+      }
+
+      where[higain] = to;
+      moved[higain] = nswaps;
+      swaps[nswaps] = higain;
+
+      if (ctrl->dbglvl&METIS_DBG_MOVEINFO) {
+        printf("Moved%6"PRIDX" from %"PRIDX"(%"PRIDX") Gain:%5"PRIDX", "
+            "Cut:%5"PRIDX", NPwgts:", higain, from, cnum, ed[higain]-id[higain], newcut);
+        for (l=0; l<ncon; l++) 
+          printf("(%.3"PRREAL" %.3"PRREAL")", pwgts[l]*invtvwgt[l], pwgts[ncon+l]*invtvwgt[l]);
+        printf(" %+.3"PRREAL" LB: %.3"PRREAL"(%+.3"PRREAL")\n", 
+            minbal, ComputeLoadImbalance(graph, 2, ctrl->pijbm), newbal);
+      }
+
+
+      /**************************************************************
+      * Update the id[i]/ed[i] values of the affected nodes
+      ***************************************************************/
+      SWAP(id[higain], ed[higain], tmp);
+      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
+        BNDDelete(nbnd, bndind,  bndptr, higain);
+
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+
+        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+        INC_DEC(id[k], ed[k], kwgt);
+
+        /* Update its boundary information and queue position */
+        if (bndptr[k] != -1) { /* If k was a boundary vertex */
+          if (ed[k] == 0) { /* Not a boundary vertex any more */
+            BNDDelete(nbnd, bndind, bndptr, k);
+            if (moved[k] == -1)  /* Remove it if in the queues */
+              rpqDelete(queues[2*qnum[k]+where[k]], k);
+          }
+          else { /* If it has not been moved, update its position in the queue */
+            if (moved[k] == -1) {
+              //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);
+              //rgain = (ed[k]-id[k] > 0 ? 
+              //              1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);
+              rgain = ed[k]-id[k];
+              rpqUpdate(queues[2*qnum[k]+where[k]], k, rgain);
+            }
+          }
+        }
+        else {
+          if (ed[k] > 0) {  /* It will now become a boundary vertex */
+            BNDInsert(nbnd, bndind, bndptr, k);
+            if (moved[k] == -1) {
+              //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);
+              //rgain = (ed[k]-id[k] > 0 ? 
+              //              1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);
+              rgain = ed[k]-id[k];
+              rpqInsert(queues[2*qnum[k]+where[k]], k, rgain);
+            }
+          }
+        }
+      }
+
+    }
+
+
+    /****************************************************************
+    * Roll back computations
+    *****************************************************************/
+    for (i=0; i<nswaps; i++)
+      moved[swaps[i]] = -1;  /* reset moved array */
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      to = where[higain] = (where[higain]+1)%2;
+      SWAP(id[higain], ed[higain], tmp);
+      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
+        BNDDelete(nbnd, bndind,  bndptr, higain);
+      else if (ed[higain] > 0 && bndptr[higain] == -1)
+        BNDInsert(nbnd, bndind,  bndptr, higain);
+
+      iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,         1);
+      iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1);
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+
+        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
+        INC_DEC(id[k], ed[k], kwgt);
+
+        if (bndptr[k] != -1 && ed[k] == 0)
+          BNDDelete(nbnd, bndind, bndptr, k);
+        if (bndptr[k] == -1 && ed[k] > 0)
+          BNDInsert(nbnd, bndind, bndptr, k);
+      }
+    }
+
+    graph->mincut = mincut;
+    graph->nbnd   = nbnd;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE, 
+        Print2WayRefineStats(ctrl, graph, ntpwgts, minbal, mincutorder));
+
+    if (mincutorder <= 0 || mincut == initcut)
+      break;
+  }
+
+  for (i=0; i<2*ncon; i++) 
+    rpqDestroy(queues[i]);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function selects the partition number and the queue from which
+    we will move vertices out. */
+/*************************************************************************/ 
+void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, 
+         rpq_t **queues, idx_t *from, idx_t *cnum)
+{
+  idx_t ncon, i, part;
+  real_t max, tmp;
+
+  ncon = graph->ncon;
+
+  *from = -1;
+  *cnum = -1;
+
+  /* First determine the side and the queue, irrespective of the presence of nodes. 
+     The side & queue is determined based on the most violated balancing constraint. */
+  for (max=0.0, part=0; part<2; part++) {
+    for (i=0; i<ncon; i++) {
+      tmp = graph->pwgts[part*ncon+i]*pijbm[part*ncon+i] - ubfactors[i];
+      /* the '=' in the test bellow is to ensure that under tight constraints
+         the partition that is at the max is selected */
+      if (tmp >= max) { 
+        max   = tmp;
+        *from = part;
+        *cnum = i;
+      }
+    }
+  }
+
+
+  if (*from != -1) {
+    /* in case the desired queue is empty, select a queue from the same side */
+    if (rpqLength(queues[2*(*cnum)+(*from)]) == 0) {
+      for (i=0; i<ncon; i++) {
+        if (rpqLength(queues[2*i+(*from)]) > 0) {
+          max   = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i];
+          *cnum = i;
+          break;
+        }
+      }
+
+      for (i++; i<ncon; i++) {
+        tmp = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i];
+        if (tmp > max && rpqLength(queues[2*i+(*from)]) > 0) {
+          max   = tmp;
+          *cnum = i;
+        }
+      }
+    }
+
+    /*
+    printf("Selected1 %"PRIDX"(%"PRIDX") -> %"PRIDX" [%5"PRREAL"]\n", 
+        *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); 
+    */
+  }
+  else {
+    /* the partitioning does not violate balancing constraints, in which case select 
+       a queue based on cut criteria */
+    for (part=0; part<2; part++) {
+      for (i=0; i<ncon; i++) {
+        if (rpqLength(queues[2*i+part]) > 0 && 
+            (*from == -1 || rpqSeeTopKey(queues[2*i+part]) > max)) {
+          max   = rpqSeeTopKey(queues[2*i+part]); 
+          *from = part;
+          *cnum = i;
+        }
+      }
+    }
+    /*
+    printf("Selected2 %"PRIDX"(%"PRIDX") -> %"PRIDX"\n", 
+        *from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max); 
+    */
+  }
+}
+
+
+/*************************************************************************/
+/*! Prints statistics about the refinement */
+/*************************************************************************/ 
+void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         real_t deltabal, idx_t mincutorder)
+{
+  int i;
+
+  if (mincutorder == -2) {
+    printf("Parts: ");
+    printf("Nv-Nb[%5"PRIDX" %5"PRIDX"] ICut: %6"PRIDX, 
+        graph->nvtxs, graph->nbnd, graph->mincut);
+    printf(" [");
+    for (i=0; i<graph->ncon; i++)
+      printf("(%.3"PRREAL" %.3"PRREAL" T:%.3"PRREAL" %.3"PRREAL")", 
+          graph->pwgts[i]*graph->invtvwgt[i], 
+          graph->pwgts[graph->ncon+i]*graph->invtvwgt[i],
+          ntpwgts[i], ntpwgts[graph->ncon+i]);
+    printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", 
+        ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal);
+  }
+  else {
+    printf("\tMincut: %6"PRIDX" at %5"PRIDX" NBND %6"PRIDX" NPwgts: [", 
+        graph->mincut, mincutorder, graph->nbnd);
+    for (i=0; i<graph->ncon; i++)
+      printf("(%.3"PRREAL" %.3"PRREAL")", 
+          graph->pwgts[i]*graph->invtvwgt[i], graph->pwgts[graph->ncon+i]*graph->invtvwgt[i]);
+    printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n", 
+        ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal);
+  }
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/fortran.c b/3rdParty/metis/metis-5.1.0/libmetis/fortran.c
new file mode 100644
index 000000000..5c3ed9029
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/fortran.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * fortran.c
+ *
+ * This file contains code for the fortran to C interface
+ *
+ * Started 8/19/97
+ * George
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 0 instead of 1 */
+/*************************************************************************/
+void Change2CNumbering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy)
+{
+  idx_t i;
+
+  for (i=0; i<=nvtxs; i++)
+    xadj[i]--;
+
+  for (i=0; i<xadj[nvtxs]; i++)
+    adjncy[i]--;
+}
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 1 instead of 0 */
+/*************************************************************************/
+void Change2FNumbering(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vector)
+{
+  idx_t i;
+
+  for (i=0; i<nvtxs; i++)
+    vector[i]++;
+
+  for (i=0; i<xadj[nvtxs]; i++)
+    adjncy[i]++;
+
+  for (i=0; i<=nvtxs; i++)
+    xadj[i]++;
+}
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 1 instead of 0 */
+/*************************************************************************/
+void Change2FNumbering2(idx_t nvtxs, idx_t *xadj, idx_t *adjncy)
+{
+  idx_t i, nedges;
+
+  nedges = xadj[nvtxs];
+  for (i=0; i<nedges; i++)
+    adjncy[i]++;
+
+  for (i=0; i<=nvtxs; i++)
+    xadj[i]++;
+}
+
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 1 instead of 0 */
+/*************************************************************************/
+void Change2FNumberingOrder(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, 
+         idx_t *v1, idx_t *v2)
+{
+  idx_t i, nedges;
+
+  for (i=0; i<nvtxs; i++) {
+    v1[i]++;
+    v2[i]++;
+  }
+
+  nedges = xadj[nvtxs];
+  for (i=0; i<nedges; i++)
+    adjncy[i]++;
+
+  for (i=0; i<=nvtxs; i++)
+    xadj[i]++;
+
+}
+
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 0 instead of 1 */
+/*************************************************************************/
+void ChangeMesh2CNumbering(idx_t n, idx_t *ptr, idx_t *ind)
+{
+  idx_t i;
+
+  for (i=0; i<=n; i++)
+    ptr[i]--;
+  for (i=0; i<ptr[n]; i++)
+    ind[i]--;
+}
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 1 instead of 0 */
+/*************************************************************************/
+void ChangeMesh2FNumbering(idx_t n, idx_t *ptr, idx_t *ind, idx_t nvtxs, 
+         idx_t *xadj, idx_t *adjncy)
+{
+  idx_t i;
+
+  for (i=0; i<ptr[n]; i++)
+    ind[i]++;
+  for (i=0; i<=n; i++)
+    ptr[i]++;
+
+  for (i=0; i<xadj[nvtxs]; i++)
+    adjncy[i]++;
+  for (i=0; i<=nvtxs; i++)
+    xadj[i]++;
+}
+
+
+/*************************************************************************/
+/*! This function changes the numbering to start from 1 instead of 0 */
+/*************************************************************************/
+void ChangeMesh2FNumbering2(idx_t ne, idx_t nn, idx_t *ptr, idx_t *ind, 
+         idx_t *epart, idx_t *npart)
+{
+  idx_t i;
+
+  for (i=0; i<ptr[ne]; i++)
+    ind[i]++;
+  for (i=0; i<=ne; i++)
+    ptr[i]++;
+
+  for (i=0; i<ne; i++)
+    epart[i]++;
+
+  for (i=0; i<nn; i++)
+    npart[i]++;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/frename.c b/3rdParty/metis/metis-5.1.0/libmetis/frename.c
new file mode 100644
index 000000000..3d43c3ade
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/frename.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * Frename.c
+ * 
+ * THis file contains some renaming routines to deal with different Fortran compilers
+ *
+ * Started 9/15/97
+ * George
+ *
+ */
+
+
+#include "metislib.h"
+
+#define FRENAME(name, dargs, cargs, name1, name2, name3, name4)   \
+  int name1 dargs { return name cargs; }                          \
+  int name2 dargs { return name cargs; }                          \
+  int name3 dargs { return name cargs; }                          \
+  int name4 dargs { return name cargs; }
+
+
+FRENAME(
+    METIS_PartGraphRecursive, 
+    (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, 
+     idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, 
+     real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part),
+    (nvtxs, ncon, xadj, adjncy, vwgt, 
+     vsize, adjwgt, nparts, tpwgts, 
+     ubvec, options, edgecut, part),
+    METIS_PARTGRAPHRECURSIVE, 
+    metis_partgraphrecursive, 
+    metis_partgraphrecursive_, 
+    metis_partgraphrecursive__
+) 
+    
+
+FRENAME(
+    METIS_PartGraphKway,
+    (idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, 
+     idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, 
+     real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part),
+    (nvtxs, ncon, xadj, adjncy, vwgt, 
+     vsize, adjwgt, nparts, tpwgts, 
+     ubvec, options, edgecut, part),
+    METIS_PARTGRAPHKWAY,
+    metis_partgraphkway,
+    metis_partgraphkway_,
+    metis_partgraphkway__
+)
+
+FRENAME(
+  METIS_MeshToDual,
+  (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *ncommon, idx_t *numflag, 
+   idx_t **r_xadj, idx_t **r_adjncy),
+  (ne, nn, eptr, eind, ncommon, numflag, r_xadj, r_adjncy),
+  METIS_MESHTODUAL,
+  metis_meshtodual,
+  metis_meshtodual_,
+  metis_meshtodual__
+)
+
+
+FRENAME(
+  METIS_MeshToNodal,
+  (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *numflag, idx_t **r_xadj, 
+   idx_t **r_adjncy),
+  (ne, nn, eptr, eind, numflag, r_xadj, r_adjncy),
+  METIS_MESHTONODAL,
+  metis_meshtonodal,
+  metis_meshtonodal_,
+  metis_meshtonodal__
+)
+  
+
+FRENAME(
+  METIS_PartMeshNodal,
+  (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, 
+   idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, 
+   idx_t *npart),
+  (ne, nn, eptr, eind, vwgt, vsize, nparts, tpwgts, options, objval, epart, npart),
+  METIS_PARTMESHNODAL,
+  metis_partmeshnodal,
+  metis_partmeshnodal_,
+  metis_partmeshnodal__
+)
+
+
+FRENAME(
+  METIS_PartMeshDual,
+  (idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, idx_t *vwgt, idx_t *vsize, 
+   idx_t *ncommon, idx_t *nparts, real_t *tpwgts, idx_t *options, idx_t *objval, 
+   idx_t *epart, idx_t *npart),
+  (ne, nn, eptr, eind, vwgt, vsize, ncommon, nparts, tpwgts, options, objval, epart, npart),
+  METIS_PARTMESHDUAL,
+  metis_partmeshdual,
+  metis_partmeshdual_,
+  metis_partmeshdual__
+)
+
+
+FRENAME(
+  METIS_NodeND,
+  (idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *options, idx_t *perm, 
+   idx_t *iperm),
+  (nvtxs, xadj, adjncy, vwgt, options, perm, iperm),
+  METIS_NODEND,
+  metis_nodend,
+  metis_nodend_,
+  metis_nodend__
+)
+
+
+FRENAME(
+  METIS_Free,
+  (void *ptr),
+  (ptr),
+  METIS_FREE,
+  metis_free,
+  metis_free_,
+  metis_free__
+)
+
+
+FRENAME(
+  METIS_SetDefaultOptions,
+  (idx_t *options),
+  (options),
+  METIS_SETDEFAULTOPTIONS,
+  metis_setdefaultoptions,
+  metis_setdefaultoptions_,
+  metis_setdefaultoptions__
+)
+    
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/gklib.c b/3rdParty/metis/metis-5.1.0/libmetis/gklib.c
new file mode 100644
index 000000000..4e17eac42
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/gklib.c
@@ -0,0 +1,120 @@
+/*!
+\file  gklib.c
+\brief Various helper routines generated using GKlib's templates
+
+\date   Started 4/12/2007
+\author George  
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version\verbatim $Id: gklib.c 10395 2011-06-23 23:28:06Z karypis $ \endverbatim
+*/
+
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! BLAS routines */
+/*************************************************************************/
+GK_MKBLAS(i,  idx_t,  idx_t)
+GK_MKBLAS(r,  real_t, real_t)
+
+/*************************************************************************/
+/*! Memory allocation routines */
+/*************************************************************************/
+GK_MKALLOC(i,    idx_t)
+GK_MKALLOC(r,    real_t)
+GK_MKALLOC(ikv,  ikv_t)
+GK_MKALLOC(rkv,  rkv_t)
+
+/*************************************************************************/
+/*! Priority queues routines */
+/*************************************************************************/
+#define key_gt(a, b) ((a) > (b))
+GK_MKPQUEUE(ipq, ipq_t, ikv_t, idx_t, idx_t, ikvmalloc, IDX_MAX, key_gt)
+GK_MKPQUEUE(rpq, rpq_t, rkv_t, real_t, idx_t, rkvmalloc, REAL_MAX, key_gt)
+#undef key_gt
+
+/*************************************************************************/
+/*! Random number generation routines */
+/*************************************************************************/
+GK_MKRANDOM(i, idx_t, idx_t)
+
+/*************************************************************************/
+/*! Utility routines */
+/*************************************************************************/
+GK_MKARRAY2CSR(i, idx_t)
+
+/*************************************************************************/
+/*! Sorting routines */
+/*************************************************************************/
+void isorti(size_t n, idx_t *base)
+{
+#define i_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(idx_t, base, n, i_lt);
+#undef i_lt
+}
+
+void isortd(size_t n, idx_t *base)
+{
+#define i_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(idx_t, base, n, i_gt);
+#undef i_gt
+}
+
+void rsorti(size_t n, real_t *base)
+{
+#define r_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(real_t, base, n, r_lt);
+#undef r_lt
+}
+
+void rsortd(size_t n, real_t *base)
+{
+#define r_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(real_t, base, n, r_gt);
+#undef r_gt
+}
+
+void ikvsorti(size_t n, ikv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(ikv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+/* Sorts based both on key and val */
+void ikvsortii(size_t n, ikv_t *base)
+{
+#define ikeyval_lt(a, b) ((a)->key < (b)->key || ((a)->key == (b)->key && (a)->val < (b)->val))
+  GK_MKQSORT(ikv_t, base, n, ikeyval_lt);
+#undef ikeyval_lt
+}
+
+void ikvsortd(size_t n, ikv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(ikv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+void rkvsorti(size_t n, rkv_t *base)
+{
+#define rkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(rkv_t, base, n, rkey_lt);
+#undef rkey_lt
+}
+
+void rkvsortd(size_t n, rkv_t *base)
+{
+#define rkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(rkv_t, base, n, rkey_gt);
+#undef rkey_gt
+}
+
+void uvwsorti(size_t n, uvw_t *base)
+{
+#define uvwkey_lt(a, b) ((a)->u < (b)->u || ((a)->u == (b)->u && (a)->v < (b)->v))
+  GK_MKQSORT(uvw_t, base, n, uvwkey_lt);
+#undef uvwkey_lt
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/gklib_defs.h b/3rdParty/metis/metis-5.1.0/libmetis/gklib_defs.h
new file mode 100644
index 000000000..dfac5ca67
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/gklib_defs.h
@@ -0,0 +1,53 @@
+/*!
+\file
+\brief Data structures and prototypes for GKlib integration
+
+\date  Started 12/23/2008
+\author George
+\version\verbatim $Id: gklib_defs.h 10395 2011-06-23 23:28:06Z karypis $ \endverbatim
+*/
+
+#ifndef _LIBMETIS_GKLIB_H_
+#define _LIBMETIS_GKLIB_H_
+
+#include "gklib_rename.h"
+
+/*************************************************************************/
+/*! Stores a weighted edge */
+/*************************************************************************/
+typedef struct {
+  idx_t u, v, w;               /*!< Edge (u,v) with weight w */
+} uvw_t;
+
+/*************************************************************************
+* Define various data structure using GKlib's templates.
+**************************************************************************/
+GK_MKKEYVALUE_T(ikv_t, idx_t, idx_t)
+GK_MKKEYVALUE_T(rkv_t, real_t, idx_t)
+GK_MKPQUEUE_T(ipq_t, ikv_t)
+GK_MKPQUEUE_T(rpq_t, rkv_t)
+
+
+/* gklib.c */
+GK_MKBLAS_PROTO(i, idx_t, idx_t)
+GK_MKBLAS_PROTO(r, real_t, real_t)
+GK_MKALLOC_PROTO(i, idx_t)
+GK_MKALLOC_PROTO(r, real_t)
+GK_MKALLOC_PROTO(ikv, ikv_t)
+GK_MKALLOC_PROTO(rkv, rkv_t)
+GK_MKPQUEUE_PROTO(ipq, ipq_t, idx_t, idx_t)
+GK_MKPQUEUE_PROTO(rpq, rpq_t, real_t, idx_t)
+GK_MKRANDOM_PROTO(i, idx_t, idx_t)
+GK_MKARRAY2CSR_PROTO(i, idx_t)
+void isorti(size_t n, idx_t *base);
+void isortd(size_t n, idx_t *base);
+void rsorti(size_t n, real_t *base);
+void rsortd(size_t n, real_t *base);
+void ikvsorti(size_t n, ikv_t *base);
+void ikvsortii(size_t n, ikv_t *base);
+void ikvsortd(size_t n, ikv_t *base);
+void rkvsorti(size_t n, rkv_t *base);
+void rkvsortd(size_t n, rkv_t *base);
+void uvwsorti(size_t n, uvw_t *base);
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/gklib_rename.h b/3rdParty/metis/metis-5.1.0/libmetis/gklib_rename.h
new file mode 100644
index 000000000..78dc8b39e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/gklib_rename.h
@@ -0,0 +1,122 @@
+/*!
+\file
+
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * This file contains header files
+ *
+ * Started 10/2/97
+ * George
+ *
+ * $Id: gklib_rename.h 10395 2011-06-23 23:28:06Z karypis $
+ *
+ */
+
+
+#ifndef _LIBMETIS_GKLIB_RENAME_H_
+#define _LIBMETIS_GKLIB_RENAME_H_
+
+/* gklib.c - generated from the .o files using the ./utils/listundescapedsumbols.csh */
+#define iAllocMatrix libmetis__iAllocMatrix
+#define iFreeMatrix libmetis__iFreeMatrix
+#define iSetMatrix libmetis__iSetMatrix
+#define iargmax libmetis__iargmax
+#define iargmax_n libmetis__iargmax_n
+#define iargmin libmetis__iargmin
+#define iarray2csr libmetis__iarray2csr
+#define iaxpy libmetis__iaxpy
+#define icopy libmetis__icopy
+#define idot libmetis__idot
+#define iincset libmetis__iincset
+#define ikvAllocMatrix libmetis__ikvAllocMatrix
+#define ikvFreeMatrix libmetis__ikvFreeMatrix
+#define ikvSetMatrix libmetis__ikvSetMatrix
+#define ikvcopy libmetis__ikvcopy
+#define ikvmalloc libmetis__ikvmalloc
+#define ikvrealloc libmetis__ikvrealloc
+#define ikvset libmetis__ikvset
+#define ikvsmalloc libmetis__ikvsmalloc
+#define ikvsortd libmetis__ikvsortd
+#define ikvsorti libmetis__ikvsorti
+#define ikvsortii libmetis__ikvsortii
+#define imalloc libmetis__imalloc
+#define imax libmetis__imax
+#define imin libmetis__imin
+#define inorm2 libmetis__inorm2
+#define ipqCheckHeap libmetis__ipqCheckHeap
+#define ipqCreate libmetis__ipqCreate
+#define ipqDelete libmetis__ipqDelete
+#define ipqDestroy libmetis__ipqDestroy
+#define ipqFree libmetis__ipqFree
+#define ipqGetTop libmetis__ipqGetTop
+#define ipqInit libmetis__ipqInit
+#define ipqInsert libmetis__ipqInsert
+#define ipqLength libmetis__ipqLength
+#define ipqReset libmetis__ipqReset
+#define ipqSeeKey libmetis__ipqSeeKey
+#define ipqSeeTopKey libmetis__ipqSeeTopKey
+#define ipqSeeTopVal libmetis__ipqSeeTopVal
+#define ipqUpdate libmetis__ipqUpdate
+#define isrand libmetis__isrand
+#define irand libmetis__irand
+#define irandArrayPermute libmetis__irandArrayPermute
+#define irandArrayPermuteFine libmetis__irandArrayPermuteFine
+#define irandInRange libmetis__irandInRange
+#define irealloc libmetis__irealloc
+#define iscale libmetis__iscale
+#define iset libmetis__iset
+#define ismalloc libmetis__ismalloc
+#define isortd libmetis__isortd
+#define isorti libmetis__isorti
+#define isrand libmetis__isrand
+#define isum libmetis__isum
+#define rAllocMatrix libmetis__rAllocMatrix
+#define rFreeMatrix libmetis__rFreeMatrix
+#define rSetMatrix libmetis__rSetMatrix
+#define rargmax libmetis__rargmax
+#define rargmax_n libmetis__rargmax_n
+#define rargmin libmetis__rargmin
+#define raxpy libmetis__raxpy
+#define rcopy libmetis__rcopy
+#define rdot libmetis__rdot
+#define rincset libmetis__rincset
+#define rkvAllocMatrix libmetis__rkvAllocMatrix
+#define rkvFreeMatrix libmetis__rkvFreeMatrix
+#define rkvSetMatrix libmetis__rkvSetMatrix
+#define rkvcopy libmetis__rkvcopy
+#define rkvmalloc libmetis__rkvmalloc
+#define rkvrealloc libmetis__rkvrealloc
+#define rkvset libmetis__rkvset
+#define rkvsmalloc libmetis__rkvsmalloc
+#define rkvsortd libmetis__rkvsortd
+#define rkvsorti libmetis__rkvsorti
+#define rmalloc libmetis__rmalloc
+#define rmax libmetis__rmax
+#define rmin libmetis__rmin
+#define rnorm2 libmetis__rnorm2
+#define rpqCheckHeap libmetis__rpqCheckHeap
+#define rpqCreate libmetis__rpqCreate
+#define rpqDelete libmetis__rpqDelete
+#define rpqDestroy libmetis__rpqDestroy
+#define rpqFree libmetis__rpqFree
+#define rpqGetTop libmetis__rpqGetTop
+#define rpqInit libmetis__rpqInit
+#define rpqInsert libmetis__rpqInsert
+#define rpqLength libmetis__rpqLength
+#define rpqReset libmetis__rpqReset
+#define rpqSeeKey libmetis__rpqSeeKey
+#define rpqSeeTopKey libmetis__rpqSeeTopKey
+#define rpqSeeTopVal libmetis__rpqSeeTopVal
+#define rpqUpdate libmetis__rpqUpdate
+#define rrealloc libmetis__rrealloc
+#define rscale libmetis__rscale
+#define rset libmetis__rset
+#define rsmalloc libmetis__rsmalloc
+#define rsortd libmetis__rsortd
+#define rsorti libmetis__rsorti
+#define rsum libmetis__rsum
+#define uvwsorti libmetis__uvwsorti
+
+#endif
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/graph.c b/3rdParty/metis/metis-5.1.0/libmetis/graph.c
new file mode 100644
index 000000000..37f7d09dc
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/graph.c
@@ -0,0 +1,274 @@
+/**
+\file
+\brief Functions that deal with setting up the graphs for METIS.
+
+\date   Started 7/25/1997
+\author George  
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version\verbatim $Id: graph.c 10513 2011-07-07 22:06:03Z karypis $ \endverbatim
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function sets up the graph from the user input */
+/*************************************************************************/
+graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, 
+             idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt) 
+{
+  idx_t i, j, k, sum;
+  real_t *nvwgt;
+  graph_t *graph;
+
+  /* allocate the graph and fill in the fields */
+  graph = CreateGraph();
+
+  graph->nvtxs  = nvtxs;
+  graph->nedges = xadj[nvtxs];
+  graph->ncon   = ncon;
+
+  graph->xadj      = xadj;
+  graph->free_xadj = 0;
+
+  graph->adjncy      = adjncy;
+  graph->free_adjncy = 0;
+
+
+  /* setup the vertex weights */
+  if (vwgt) {
+    graph->vwgt      = vwgt;
+    graph->free_vwgt = 0;
+  }
+  else {
+    vwgt = graph->vwgt = ismalloc(ncon*nvtxs, 1, "SetupGraph: vwgt");
+  }
+
+  graph->tvwgt    = imalloc(ncon, "SetupGraph: tvwgts");
+  graph->invtvwgt = rmalloc(ncon, "SetupGraph: invtvwgts");
+  for (i=0; i<ncon; i++) {
+    graph->tvwgt[i]    = isum(nvtxs, vwgt+i, ncon);
+    graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1);
+  }
+
+
+  if (ctrl->objtype == METIS_OBJTYPE_VOL) { 
+    /* Setup the vsize */
+    if (vsize) {
+      graph->vsize      = vsize;
+      graph->free_vsize = 0;
+    }
+    else {
+      vsize = graph->vsize = ismalloc(nvtxs, 1, "SetupGraph: vsize");
+    }
+
+    /* Allocate memory for edge weights and initialize them to the sum of the vsize */
+    adjwgt = graph->adjwgt = imalloc(graph->nedges, "SetupGraph: adjwgt");
+    for (i=0; i<nvtxs; i++) {
+      for (j=xadj[i]; j<xadj[i+1]; j++)
+        adjwgt[j] = 1+vsize[i]+vsize[adjncy[j]];
+    }
+  }
+  else { /* For edgecut minimization */
+    /* setup the edge weights */
+    if (adjwgt) {
+      graph->adjwgt      = adjwgt;
+      graph->free_adjwgt = 0;
+    }
+    else {
+      adjwgt = graph->adjwgt = ismalloc(graph->nedges, 1, "SetupGraph: adjwgt");
+    }
+  }
+
+
+  /* setup various derived info */
+  SetupGraph_tvwgt(graph);
+
+  if (ctrl->optype == METIS_OP_PMETIS || ctrl->optype == METIS_OP_OMETIS) 
+    SetupGraph_label(graph);
+
+  ASSERT(CheckGraph(graph, ctrl->numflag, 1));
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! Set's up the tvwgt/invtvwgt info */
+/*************************************************************************/
+void SetupGraph_tvwgt(graph_t *graph)
+{
+  idx_t i;
+
+  if (graph->tvwgt == NULL) 
+    graph->tvwgt  = imalloc(graph->ncon, "SetupGraph_tvwgt: tvwgt");
+  if (graph->invtvwgt == NULL) 
+    graph->invtvwgt = rmalloc(graph->ncon, "SetupGraph_tvwgt: invtvwgt");
+
+  for (i=0; i<graph->ncon; i++) {
+    graph->tvwgt[i]    = isum(graph->nvtxs, graph->vwgt+i, graph->ncon);
+    graph->invtvwgt[i] = 1.0/(graph->tvwgt[i] > 0 ? graph->tvwgt[i] : 1);
+  }
+}
+
+
+/*************************************************************************/
+/*! Set's up the label info */
+/*************************************************************************/
+void SetupGraph_label(graph_t *graph)
+{
+  idx_t i;
+
+  if (graph->label == NULL)
+    graph->label = imalloc(graph->nvtxs, "SetupGraph_label: label");
+
+  for (i=0; i<graph->nvtxs; i++)
+    graph->label[i] = i;
+}
+
+
+/*************************************************************************/
+/*! Setup the various arrays for the splitted graph */
+/*************************************************************************/
+graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges)
+{
+  graph_t *sgraph;
+
+  sgraph = CreateGraph();
+
+  sgraph->nvtxs  = snvtxs;
+  sgraph->nedges = snedges;
+  sgraph->ncon   = graph->ncon;
+
+  /* Allocate memory for the splitted graph */
+  sgraph->xadj        = imalloc(snvtxs+1, "SetupSplitGraph: xadj");
+  sgraph->vwgt        = imalloc(sgraph->ncon*snvtxs, "SetupSplitGraph: vwgt");
+  sgraph->adjncy      = imalloc(snedges,  "SetupSplitGraph: adjncy");
+  sgraph->adjwgt      = imalloc(snedges,  "SetupSplitGraph: adjwgt");
+  sgraph->label	      = imalloc(snvtxs,   "SetupSplitGraph: label");
+  sgraph->tvwgt       = imalloc(sgraph->ncon, "SetupSplitGraph: tvwgt");
+  sgraph->invtvwgt    = rmalloc(sgraph->ncon, "SetupSplitGraph: invtvwgt");
+
+  if (graph->vsize)
+    sgraph->vsize     = imalloc(snvtxs,   "SetupSplitGraph: vsize");
+
+  return sgraph;
+}
+
+
+/*************************************************************************/
+/*! This function creates and initializes a graph_t data structure */
+/*************************************************************************/
+graph_t *CreateGraph(void)
+{
+  graph_t *graph;
+
+  graph = (graph_t *)gk_malloc(sizeof(graph_t), "CreateGraph: graph");
+
+  InitGraph(graph);
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! This function initializes a graph_t data structure */
+/*************************************************************************/
+void InitGraph(graph_t *graph) 
+{
+  memset((void *)graph, 0, sizeof(graph_t));
+
+  /* graph size constants */
+  graph->nvtxs     = -1;
+  graph->nedges    = -1;
+  graph->ncon      = -1;
+  graph->mincut    = -1;
+  graph->minvol    = -1;
+  graph->nbnd      = -1;
+
+  /* memory for the graph structure */
+  graph->xadj      = NULL;
+  graph->vwgt      = NULL;
+  graph->vsize     = NULL;
+  graph->adjncy    = NULL;
+  graph->adjwgt    = NULL;
+  graph->label     = NULL;
+  graph->cmap      = NULL;
+  graph->tvwgt     = NULL;
+  graph->invtvwgt  = NULL;
+
+  /* by default these are set to true, but the can be explicitly changed afterwards */
+  graph->free_xadj   = 1;
+  graph->free_vwgt   = 1;
+  graph->free_vsize  = 1;
+  graph->free_adjncy = 1;
+  graph->free_adjwgt = 1;
+
+
+  /* memory for the partition/refinement structure */
+  graph->where     = NULL;
+  graph->pwgts     = NULL;
+  graph->id        = NULL;
+  graph->ed        = NULL;
+  graph->bndptr    = NULL;
+  graph->bndind    = NULL;
+  graph->nrinfo    = NULL;
+  graph->ckrinfo   = NULL;
+  graph->vkrinfo   = NULL;
+
+  /* linked-list structure */
+  graph->coarser   = NULL;
+  graph->finer     = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function frees the refinement/partition memory stored in a graph */
+/*************************************************************************/
+void FreeRData(graph_t *graph) 
+{
+
+  /* The following is for the -minconn and -contig to work properly in
+     the vol-refinement routines */
+  if ((void *)graph->ckrinfo == (void *)graph->vkrinfo)
+    graph->ckrinfo = NULL;
+
+
+  /* free partition/refinement structure */
+  gk_free((void **)&graph->where, &graph->pwgts, &graph->id, &graph->ed, 
+      &graph->bndptr, &graph->bndind, &graph->nrinfo, &graph->ckrinfo, 
+      &graph->vkrinfo, LTERM);
+}
+
+
+/*************************************************************************/
+/*! This function deallocates any memory stored in a graph */
+/*************************************************************************/
+void FreeGraph(graph_t **r_graph) 
+{
+  graph_t *graph;
+
+  graph = *r_graph;
+
+  /* free graph structure */
+  if (graph->free_xadj)
+    gk_free((void **)&graph->xadj, LTERM);
+  if (graph->free_vwgt)
+    gk_free((void **)&graph->vwgt, LTERM);
+  if (graph->free_vsize)
+    gk_free((void **)&graph->vsize, LTERM);
+  if (graph->free_adjncy)
+    gk_free((void **)&graph->adjncy, LTERM);
+  if (graph->free_adjwgt)
+    gk_free((void **)&graph->adjwgt, LTERM);
+    
+  /* free partition/refinement structure */
+  FreeRData(graph);
+
+  gk_free((void **)&graph->tvwgt, &graph->invtvwgt, &graph->label, 
+      &graph->cmap, &graph, LTERM);
+
+  *r_graph = NULL;
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/initpart.c b/3rdParty/metis/metis-5.1.0/libmetis/initpart.c
new file mode 100644
index 000000000..2f6c81b72
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/initpart.c
@@ -0,0 +1,630 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * initpart.c
+ *
+ * This file contains code that performs the initial partition of the
+ * coarsest graph
+ *
+ * Started 7/23/97
+ * George
+ *
+ */
+
+#include "metislib.h"
+
+/*************************************************************************/
+/*! This function computes the initial bisection of the coarsest graph */
+/*************************************************************************/
+void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts) 
+{
+  mdbglvl_et dbglvl;
+
+  ASSERT(graph->tvwgt[0] >= 0);
+
+  dbglvl = ctrl->dbglvl;
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE);
+  IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr));
+
+  switch (ctrl->iptype) {
+    case METIS_IPTYPE_RANDOM:
+      if (graph->ncon == 1)
+        RandomBisection(ctrl, graph, ntpwgts, niparts);
+      else
+        McRandomBisection(ctrl, graph, ntpwgts, niparts);
+      break;
+
+    case METIS_IPTYPE_GROW:
+      if (graph->nedges == 0)
+        if (graph->ncon == 1)
+          RandomBisection(ctrl, graph, ntpwgts, niparts);
+        else
+          McRandomBisection(ctrl, graph, ntpwgts, niparts);
+      else
+        if (graph->ncon == 1)
+          GrowBisection(ctrl, graph, ntpwgts, niparts);
+        else
+          McGrowBisection(ctrl, graph, ntpwgts, niparts);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown initial partition type: %d\n", ctrl->iptype);
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Cut: %"PRIDX"\n", graph->mincut));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr));
+  ctrl->dbglvl = dbglvl;
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the initial separator of the coarsest graph */
+/*************************************************************************/
+void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts) 
+{
+  real_t ntpwgts[2] = {0.5, 0.5};
+  mdbglvl_et dbglvl;
+
+  dbglvl = ctrl->dbglvl;
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE, ctrl->dbglvl -= METIS_DBG_REFINE);
+  IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, ctrl->dbglvl -= METIS_DBG_MOVEINFO);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr));
+
+  /* this is required for the cut-based part of the refinement */
+  Setup2WayBalMultipliers(ctrl, graph, ntpwgts);
+
+  switch (ctrl->iptype) {
+    case METIS_IPTYPE_EDGE:
+      if (graph->nedges == 0)
+        RandomBisection(ctrl, graph, ntpwgts, niparts);
+      else
+        GrowBisection(ctrl, graph, ntpwgts, niparts);
+
+      Compute2WayPartitionParams(ctrl, graph);
+      ConstructSeparator(ctrl, graph);
+      break;
+
+    case METIS_IPTYPE_NODE:
+      GrowBisectionNode(ctrl, graph, ntpwgts, niparts);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unkown iptype of %"PRIDX"\n", ctrl->iptype);
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_IPART, printf("Initial Sep: %"PRIDX"\n", graph->mincut));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr));
+
+  ctrl->dbglvl = dbglvl;
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a bisection of a graph by randomly assigning
+    the vertices followed by a bisection refinement.
+    The resulting partition is returned in graph->where.
+*/
+/*************************************************************************/
+void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, ii, j, k, nvtxs, pwgts[2], zeromaxpwgt, from, me, 
+        bestcut=0, icut, mincut, inbfs;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where;
+  idx_t *perm, *bestwhere;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  Allocate2WayPartitionMemory(ctrl, graph);
+  where = graph->where;
+
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+  perm      = iwspacemalloc(ctrl, nvtxs);
+
+  zeromaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[0];
+
+  for (inbfs=0; inbfs<niparts; inbfs++) {
+    iset(nvtxs, 1, where);
+
+    if (inbfs > 0) {
+      irandArrayPermute(nvtxs, perm, nvtxs/2, 1);
+      pwgts[1] = graph->tvwgt[0];
+      pwgts[0] = 0;
+
+      for (ii=0; ii<nvtxs; ii++) {
+        i = perm[ii];
+        if (pwgts[0]+vwgt[i] < zeromaxpwgt) {
+          where[i] = 0;
+          pwgts[0] += vwgt[i];
+          pwgts[1] -= vwgt[i];
+          if (pwgts[0] > zeromaxpwgt)
+            break;
+        }
+      }
+    }
+
+    /* Do some partition refinement  */
+    Compute2WayPartitionParams(ctrl, graph);
+    /* printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */
+
+    Balance2Way(ctrl, graph, ntpwgts);
+    /* printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */
+
+    FM_2WayRefine(ctrl, graph, ntpwgts, 4);
+    /* printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */
+
+    if (inbfs==0 || bestcut > graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+      if (bestcut == 0)
+        break;
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function takes a graph and produces a bisection by using a region
+    growing algorithm. The resulting bisection is refined using FM.
+    The resulting partition is returned in graph->where.
+*/
+/*************************************************************************/
+void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, j, k, nvtxs, drain, nleft, first, last, 
+        pwgts[2], oneminpwgt, onemaxpwgt, 
+        from, me, bestcut=0, icut, mincut, inbfs;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where;
+  idx_t *queue, *touched, *gain, *bestwhere;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  Allocate2WayPartitionMemory(ctrl, graph);
+  where = graph->where;
+
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+  queue     = iwspacemalloc(ctrl, nvtxs);
+  touched   = iwspacemalloc(ctrl, nvtxs);
+
+  onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*ntpwgts[1];
+  oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*ntpwgts[1];
+
+  for (inbfs=0; inbfs<niparts; inbfs++) {
+    iset(nvtxs, 1, where);
+
+    iset(nvtxs, 0, touched);
+
+    pwgts[1] = graph->tvwgt[0];
+    pwgts[0] = 0;
+
+
+    queue[0] = irandInRange(nvtxs);
+    touched[queue[0]] = 1;
+    first = 0; 
+    last  = 1;
+    nleft = nvtxs-1;
+    drain = 0;
+
+    /* Start the BFS from queue to get a partition */
+    for (;;) {
+      if (first == last) { /* Empty. Disconnected graph! */
+        if (nleft == 0 || drain)
+          break;
+
+        k = irandInRange(nleft);
+        for (i=0; i<nvtxs; i++) {
+          if (touched[i] == 0) {
+            if (k == 0)
+              break;
+            else
+              k--;
+          }
+        }
+
+        queue[0]   = i;
+        touched[i] = 1;
+        first      = 0; 
+        last       = 1;
+        nleft--;
+      }
+
+      i = queue[first++];
+      if (pwgts[0] > 0 && pwgts[1]-vwgt[i] < oneminpwgt) {
+        drain = 1;
+        continue;
+      }
+
+      where[i] = 0;
+      INC_DEC(pwgts[0], pwgts[1], vwgt[i]);
+      if (pwgts[1] <= onemaxpwgt)
+        break;
+
+      drain = 0;
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        k = adjncy[j];
+        if (touched[k] == 0) {
+          queue[last++] = k;
+          touched[k] = 1;
+          nleft--;
+        }
+      }
+    }
+
+    /* Check to see if we hit any bad limiting cases */
+    if (pwgts[1] == 0) 
+      where[irandInRange(nvtxs)] = 1;
+    if (pwgts[0] == 0) 
+      where[irandInRange(nvtxs)] = 0;
+
+    /*************************************************************
+    * Do some partition refinement 
+    **************************************************************/
+    Compute2WayPartitionParams(ctrl, graph);
+    /*
+    printf("IPART: %3"PRIDX" [%5"PRIDX" %5"PRIDX"] [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", 
+        graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); 
+    */
+
+    Balance2Way(ctrl, graph, ntpwgts);
+    /*
+    printf("BPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0],
+        graph->pwgts[1], graph->mincut); 
+    */
+
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+    /*
+    printf("RPART: [%5"PRIDX" %5"PRIDX"] %5"PRIDX"\n", graph->pwgts[0], 
+        graph->pwgts[1], graph->mincut);
+    */
+
+    if (inbfs == 0 || bestcut > graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+      if (bestcut == 0)
+        break;
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function takes a multi-constraint graph and computes a bisection 
+    by randomly assigning the vertices and then refining it. The resulting
+    partition is returned in graph->where.
+*/
+/**************************************************************************/
+void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, ii, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs, qnum;
+  idx_t *bestwhere, *where, *perm, *counts;
+  idx_t *vwgt;
+
+  WCOREPUSH;
+
+  nvtxs = graph->nvtxs;
+  ncon  = graph->ncon;
+  vwgt  = graph->vwgt;
+
+  Allocate2WayPartitionMemory(ctrl, graph);
+  where = graph->where;
+
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+  perm      = iwspacemalloc(ctrl, nvtxs);
+  counts    = iwspacemalloc(ctrl, ncon);
+
+  for (inbfs=0; inbfs<2*niparts; inbfs++) {
+    irandArrayPermute(nvtxs, perm, nvtxs/2, 1);
+    iset(ncon, 0, counts);
+
+    /* partition by spliting the queues randomly */
+    for (ii=0; ii<nvtxs; ii++) {
+      i        = perm[ii];
+      qnum     = iargmax(ncon, vwgt+i*ncon);
+      where[i] = (counts[qnum]++)%2;
+    }
+
+    Compute2WayPartitionParams(ctrl, graph);
+
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+    Balance2Way(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+    Balance2Way(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+
+    if (inbfs == 0 || bestcut >= graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+      if (bestcut == 0)
+        break;
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function takes a multi-constraint graph and produces a bisection 
+    by using a region growing algorithm. The resulting partition is 
+    returned in graph->where.
+*/
+/*************************************************************************/
+void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, j, k, nvtxs, ncon, from, bestcut=0, mincut, inbfs;
+  idx_t *bestwhere, *where;
+
+  WCOREPUSH;
+
+  nvtxs = graph->nvtxs;
+
+  Allocate2WayPartitionMemory(ctrl, graph);
+  where = graph->where;
+
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+
+  for (inbfs=0; inbfs<2*niparts; inbfs++) {
+    iset(nvtxs, 1, where);
+    where[irandInRange(nvtxs)] = 0;
+
+    Compute2WayPartitionParams(ctrl, graph);
+
+    Balance2Way(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+    Balance2Way(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+
+    if (inbfs == 0 || bestcut >= graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+      if (bestcut == 0)
+        break;
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/* This function takes a graph and produces a tri-section into left, right,
+   and separator using a region growing algorithm. The resulting separator
+   is refined using node FM.
+   The resulting partition is returned in graph->where.
+*/
+/**************************************************************************/
+void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], oneminpwgt, 
+        onemaxpwgt, from, me, bestcut=0, icut, mincut, inbfs;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *bndind;
+  idx_t *queue, *touched, *gain, *bestwhere;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+  queue     = iwspacemalloc(ctrl, nvtxs);
+  touched   = iwspacemalloc(ctrl, nvtxs);
+
+  onemaxpwgt = ctrl->ubfactors[0]*graph->tvwgt[0]*0.5;
+  oneminpwgt = (1.0/ctrl->ubfactors[0])*graph->tvwgt[0]*0.5;
+
+
+  /* Allocate refinement memory. Allocate sufficient memory for both edge and node */
+  graph->pwgts  = imalloc(3, "GrowBisectionNode: pwgts");
+  graph->where  = imalloc(nvtxs, "GrowBisectionNode: where");
+  graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr");
+  graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind");
+  graph->id     = imalloc(nvtxs, "GrowBisectionNode: id");
+  graph->ed     = imalloc(nvtxs, "GrowBisectionNode: ed");
+  graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo");
+  
+  where  = graph->where;
+  bndind = graph->bndind;
+
+  for (inbfs=0; inbfs<niparts; inbfs++) {
+    iset(nvtxs, 1, where);
+    iset(nvtxs, 0, touched);
+
+    pwgts[1] = graph->tvwgt[0];
+    pwgts[0] = 0;
+
+    queue[0] = irandInRange(nvtxs);
+    touched[queue[0]] = 1;
+    first = 0; last = 1;
+    nleft = nvtxs-1;
+    drain = 0;
+
+    /* Start the BFS from queue to get a partition */
+    for (;;) {
+      if (first == last) { /* Empty. Disconnected graph! */
+        if (nleft == 0 || drain)
+          break;
+  
+        k = irandInRange(nleft);
+        for (i=0; i<nvtxs; i++) { /* select the kth untouched vertex */
+          if (touched[i] == 0) {
+            if (k == 0)
+              break;
+            else
+              k--;
+          }
+        }
+
+        queue[0]   = i;
+        touched[i] = 1;
+        first      = 0; 
+        last       = 1;
+        nleft--;
+      }
+
+      i = queue[first++];
+      if (pwgts[1]-vwgt[i] < oneminpwgt) {
+        drain = 1;
+        continue;
+      }
+
+      where[i] = 0;
+      INC_DEC(pwgts[0], pwgts[1], vwgt[i]);
+      if (pwgts[1] <= onemaxpwgt)
+        break;
+
+      drain = 0;
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        k = adjncy[j];
+        if (touched[k] == 0) {
+          queue[last++] = k;
+          touched[k] = 1;
+          nleft--;
+        }
+      }
+    }
+
+    /*************************************************************
+    * Do some partition refinement 
+    **************************************************************/
+    Compute2WayPartitionParams(ctrl, graph);
+    Balance2Way(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, 4);
+
+    /* Construct and refine the vertex separator */
+    for (i=0; i<graph->nbnd; i++) {
+      j = bndind[i];
+      if (xadj[j+1]-xadj[j] > 0) /* ignore islands */
+        where[j] = 2;
+    }
+
+    Compute2WayNodePartitionParams(ctrl, graph); 
+    FM_2WayNodeRefine2Sided(ctrl, graph, 1);
+    FM_2WayNodeRefine1Sided(ctrl, graph, 4);
+
+    /*
+    printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", 
+        inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); 
+    */
+    
+    if (inbfs == 0 || bestcut > graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/* This function takes a graph and produces a tri-section into left, right,
+   and separator using a region growing algorithm. The resulting separator
+   is refined using node FM.
+   The resulting partition is returned in graph->where.
+*/
+/**************************************************************************/
+void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         idx_t niparts)
+{
+  idx_t i, j, k, nvtxs, bestcut=0, mincut, inbfs;
+  idx_t *xadj, *where, *bndind, *bestwhere;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+
+  /* Allocate refinement memory. Allocate sufficient memory for both edge and node */
+  graph->pwgts  = imalloc(3, "GrowBisectionNode: pwgts");
+  graph->where  = imalloc(nvtxs, "GrowBisectionNode: where");
+  graph->bndptr = imalloc(nvtxs, "GrowBisectionNode: bndptr");
+  graph->bndind = imalloc(nvtxs, "GrowBisectionNode: bndind");
+  graph->id     = imalloc(nvtxs, "GrowBisectionNode: id");
+  graph->ed     = imalloc(nvtxs, "GrowBisectionNode: ed");
+  graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "GrowBisectionNode: nrinfo");
+  
+  bestwhere = iwspacemalloc(ctrl, nvtxs);
+
+  where  = graph->where;
+  bndind = graph->bndind;
+
+  for (inbfs=0; inbfs<niparts; inbfs++) {
+    iset(nvtxs, 1, where);
+    if (inbfs > 0)
+      where[irandInRange(nvtxs)] = 0;
+
+    Compute2WayPartitionParams(ctrl, graph);
+    General2WayBalance(ctrl, graph, ntpwgts);
+    FM_2WayRefine(ctrl, graph, ntpwgts, ctrl->niter);
+
+    /* Construct and refine the vertex separator */
+    for (i=0; i<graph->nbnd; i++) {
+      j = bndind[i];
+      if (xadj[j+1]-xadj[j] > 0) /* ignore islands */
+        where[j] = 2;
+    }
+
+    Compute2WayNodePartitionParams(ctrl, graph); 
+    FM_2WayNodeRefine2Sided(ctrl, graph, 4);
+
+    /*
+    printf("ISep: [%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"] %"PRIDX"\n", 
+        inbfs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); 
+    */
+
+    if (inbfs == 0 || bestcut > graph->mincut) {
+      bestcut = graph->mincut;
+      icopy(nvtxs, where, bestwhere);
+    }
+  }
+
+  graph->mincut = bestcut;
+  icopy(nvtxs, bestwhere, where);
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/kmetis.c b/3rdParty/metis/metis-5.1.0/libmetis/kmetis.c
new file mode 100644
index 000000000..cb6d1afb3
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/kmetis.c
@@ -0,0 +1,243 @@
+/*!
+\file  
+\brief The top-level routines for  multilevel k-way partitioning that minimizes
+       the edge cut.
+
+\date   Started 7/28/1997
+\author George  
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version\verbatim $Id: kmetis.c 13905 2013-03-25 13:21:20Z karypis $ \endverbatim
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point for MCKMETIS */
+/*************************************************************************/
+int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, 
+          idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, 
+          real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, 
+          idx_t *part)
+{
+  int sigrval=0, renumber=0;
+  graph_t *graph;
+  ctrl_t *ctrl;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0)
+    goto SIGTHROW;
+
+
+  /* set up the run parameters */
+  ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec);
+  if (!ctrl) {
+    gk_siguntrap();
+    return METIS_ERROR_INPUT;
+  }
+
+  /* if required, change the numbering to 0 */
+  if (ctrl->numflag == 1) {
+    Change2CNumbering(*nvtxs, xadj, adjncy);
+    renumber = 1;
+  }
+
+  /* set up the graph */
+  graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt);
+
+  /* set up multipliers for making balance computations easier */
+  SetupKWayBalMultipliers(ctrl, graph);
+
+  /* set various run parameters that depend on the graph */
+  ctrl->CoarsenTo = gk_max((*nvtxs)/(20*gk_log2(*nparts)), 30*(*nparts));
+  ctrl->nIparts   = (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5);
+
+  /* take care contiguity requests for disconnected graphs */
+  if (ctrl->contig && !IsConnected(graph, 0)) 
+    gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n");
+    
+  /* allocate workspace memory */  
+  AllocateWorkSpace(ctrl, graph);
+
+  /* start the partitioning */
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr));
+
+  *objval = MlevelKWayPartitioning(ctrl, graph, part);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));
+
+  /* clean up */
+  FreeCtrl(&ctrl);
+
+SIGTHROW:
+  /* if required, change the numbering back to 1 */
+  if (renumber)
+    Change2FNumbering(*nvtxs, xadj, adjncy, part);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  return metis_rcode(sigrval);
+}
+
+
+/*************************************************************************/
+/*! This function computes a k-way partitioning of a graph that minimizes
+    the specified objective function.
+
+    \param ctrl is the control structure
+    \param graph is the graph to be partitioned
+    \param part is the vector that on return will store the partitioning
+
+    \returns the objective value of the partitoning. The partitioning 
+             itself is stored in the part vector.
+*/
+/*************************************************************************/
+idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part)
+{
+  idx_t i, j, objval=0, curobj=0, bestobj=0;
+  real_t curbal=0.0, bestbal=0.0;
+  graph_t *cgraph;
+  int status;
+
+
+  for (i=0; i<ctrl->ncuts; i++) {
+    cgraph = CoarsenGraph(ctrl, graph);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->InitPartTmr));
+    AllocateKWayPartitionMemory(ctrl, cgraph);
+
+    /* Release the work space */
+    FreeWorkSpace(ctrl);
+
+    /* Compute the initial partitioning */
+    InitKWayPartitioning(ctrl, cgraph);
+
+    /* Re-allocate the work space */
+    AllocateWorkSpace(ctrl, graph);
+    AllocateRefinementWorkSpace(ctrl, 2*cgraph->nedges);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->InitPartTmr));
+    IFSET(ctrl->dbglvl, METIS_DBG_IPART, 
+        printf("Initial %"PRIDX"-way partitioning cut: %"PRIDX"\n", ctrl->nparts, objval));
+
+    RefineKWay(ctrl, graph, cgraph);
+
+    switch (ctrl->objtype) {
+      case METIS_OBJTYPE_CUT:
+        curobj = graph->mincut;
+        break;
+
+      case METIS_OBJTYPE_VOL:
+        curobj = graph->minvol;
+        break;
+
+      default:
+        gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype);
+    }
+
+    curbal = ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors);
+
+    if (i == 0 
+        || (curbal <= 0.0005 && bestobj > curobj)
+        || (bestbal > 0.0005 && curbal < bestbal)) {
+      icopy(graph->nvtxs, graph->where, part);
+      bestobj = curobj;
+      bestbal = curbal;
+    }
+
+    FreeRData(graph);
+
+    if (bestobj == 0)
+      break;
+  }
+
+  FreeGraph(&graph);
+
+  return bestobj;
+}
+
+
+/*************************************************************************/
+/*! This function computes the initial k-way partitioning using PMETIS 
+*/
+/*************************************************************************/
+void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ntrials, options[METIS_NOPTIONS], curobj=0, bestobj=0;
+  idx_t *bestwhere=NULL;
+  real_t *ubvec=NULL;
+  int status;
+
+  METIS_SetDefaultOptions(options);
+  options[METIS_OPTION_NITER]   = 10;
+  options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
+  options[METIS_OPTION_NO2HOP]  = ctrl->no2hop;
+
+
+  ubvec = rmalloc(graph->ncon, "InitKWayPartitioning: ubvec");
+  for (i=0; i<graph->ncon; i++) 
+    ubvec[i] = (real_t)pow(ctrl->ubfactors[i], 1.0/log(ctrl->nparts));
+
+
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+    case METIS_OBJTYPE_VOL:
+      options[METIS_OPTION_NCUTS] = ctrl->nIparts;
+      status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, 
+                   graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, 
+                   graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, 
+                   options, &curobj, graph->where);
+
+      if (status != METIS_OK)
+        gk_errexit(SIGERR, "Failed during initial partitioning\n");
+
+      break;
+
+#ifdef XXX /* This does not seem to help */
+    case METIS_OBJTYPE_VOL:
+      bestwhere = imalloc(graph->nvtxs, "InitKWayPartitioning: bestwhere");
+      options[METIS_OPTION_NCUTS] = 2;
+
+      ntrials = (ctrl->nIparts+1)/2;
+      for (i=0; i<ntrials; i++) {
+        status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, 
+                     graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, 
+                     graph->adjwgt, &ctrl->nparts, ctrl->tpwgts, ubvec, 
+                     options, &curobj, graph->where);
+        if (status != METIS_OK)
+          gk_errexit(SIGERR, "Failed during initial partitioning\n");
+
+        curobj = ComputeVolume(graph, graph->where);
+
+        if (i == 0 || bestobj > curobj) {
+          bestobj = curobj;
+          if (i < ntrials-1)
+            icopy(graph->nvtxs, graph->where, bestwhere);
+        }
+
+        if (bestobj == 0)
+          break;
+      }
+      if (bestobj != curobj)
+        icopy(graph->nvtxs, bestwhere, graph->where);
+
+      break;
+#endif
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype);
+  }
+
+  gk_free((void **)&ubvec, &bestwhere, LTERM);
+
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/kwayfm.c b/3rdParty/metis/metis-5.1.0/libmetis/kwayfm.c
new file mode 100644
index 000000000..dedfd3909
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/kwayfm.c
@@ -0,0 +1,1852 @@
+/*!
+\file 
+\brief Routines for k-way refinement 
+
+\date Started 7/28/97
+\author George
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version $Id: kwayfm.c 10567 2011-07-13 16:17:07Z karypis $
+*/
+
+#include "metislib.h"
+
+
+
+/*************************************************************************/
+/* Top-level routine for k-way partitioning refinement. This routine just
+   calls the appropriate refinement routine based on the objectives and
+   constraints. */
+/*************************************************************************/
+void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode)
+{
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      if (graph->ncon == 1)
+        Greedy_KWayCutOptimize(ctrl, graph, niter, ffactor, omode);
+      else
+        Greedy_McKWayCutOptimize(ctrl, graph, niter, ffactor, omode);
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      if (graph->ncon == 1)
+        Greedy_KWayVolOptimize(ctrl, graph, niter, ffactor, omode);
+      else
+        Greedy_McKWayVolOptimize(ctrl, graph, niter, ffactor, omode);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+}
+
+
+/*************************************************************************/
+/*! K-way partitioning optimization in which the vertices are visited in 
+    decreasing ed/sqrt(nnbrs)-id order. Note this is just an 
+    approximation, as the ed is often split across different subdomains 
+    and the sqrt(nnbrs) is just a crude approximation.
+
+  \param graph is the graph that is being refined.
+  \param niter is the number of refinement iterations.
+  \param ffactor is the \em fudge-factor for allowing positive gain moves 
+         to violate the max-pwgt constraint.
+  \param omode is the type of optimization that will performed among
+         OMODE_REFINE and OMODE_BALANCE 
+         
+
+*/
+/**************************************************************************/
+void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode)
+{
+  /* Common variables to all types of kway-refinement/balancing routines */
+  idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; 
+  idx_t from, me, to, oldcut, vwgt;
+  idx_t *xadj, *adjncy, *adjwgt;
+  idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts;
+  idx_t nmoved, nupd, *vstatus, *updptr, *updind;
+  idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL;
+  idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL;
+  idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE);
+
+  /* Edgecut-specific/different variables */
+  idx_t nbnd, oldnnbrs;
+  rpq_t *queue;
+  real_t rgain;
+  ckrinfo_t *myrinfo;
+  cnbr_t *mynbrs;
+
+  WCOREPUSH;
+
+  /* Link the graph fields */
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+
+  where = graph->where;
+  pwgts = graph->pwgts;
+  
+  nparts = ctrl->nparts;
+
+  /* Setup the weight intervals of the various subdomains */
+  minwgt  = iwspacemalloc(ctrl, nparts);
+  maxwgt  = iwspacemalloc(ctrl, nparts);
+  itpwgts = iwspacemalloc(ctrl, nparts);
+
+  for (i=0; i<nparts; i++) {
+    itpwgts[i] = ctrl->tpwgts[i]*graph->tvwgt[0];
+    maxwgt[i]  = ctrl->tpwgts[i]*graph->tvwgt[0]*ctrl->ubfactors[0];
+    minwgt[i]  = ctrl->tpwgts[i]*graph->tvwgt[0]*(1.0/ctrl->ubfactors[0]);
+  }
+
+  perm = iwspacemalloc(ctrl, nvtxs);
+
+
+  /* This stores the valid target subdomains. It is used when ctrl->minconn to
+     control the subdomains to which moves are allowed to be made. 
+     When ctrl->minconn is false, the default values of 2 allow all moves to
+     go through and it does not interfere with the zero-gain move selection. */
+  safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->minconn) {
+    ComputeSubDomainGraph(ctrl, graph);
+
+    nads    = ctrl->nads;
+    adids   = ctrl->adids;
+    adwgts  = ctrl->adwgts;
+    doms    = iset(nparts, 0, ctrl->pvec1);
+  }
+
+
+  /* Setup updptr, updind like boundary info to keep track of the vertices whose
+     vstatus's need to be reset at the end of the inner iteration */
+  vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs));
+  updptr  = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+  updind  = iwspacemalloc(ctrl, nvtxs);
+
+  if (ctrl->contig) {
+    /* The arrays that will be used for limited check of articulation points */
+    bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+    bfsind = iwspacemalloc(ctrl, nvtxs);
+    bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  }
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+     printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"," 
+            " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX,
+            (omode == OMODE_REFINE ? "GRC" : "GBC"),
+            pwgts[iargmin(nparts, pwgts)], imax(nparts, pwgts), minwgt[0], maxwgt[0], 
+            ComputeLoadImbalance(graph, nparts, ctrl->pijbm), 
+            graph->nvtxs, graph->nbnd, graph->mincut);
+     if (ctrl->minconn) 
+       printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+     printf("\n");
+  }
+
+  queue = rpqCreate(nvtxs);
+
+  /*=====================================================================
+  * The top-level refinement loop 
+  *======================================================================*/
+  for (pass=0; pass<niter; pass++) {
+    ASSERT(ComputeCut(graph, where) == graph->mincut);
+
+    if (omode == OMODE_BALANCE) {
+      /* Check to see if things are out of balance, given the tolerance */
+      for (i=0; i<nparts; i++) {
+        if (pwgts[i] > maxwgt[i])
+          break;
+      }
+      if (i == nparts) /* Things are balanced. Return right away */
+        break;
+    }
+
+    oldcut = graph->mincut;
+    nbnd   = graph->nbnd;
+    nupd   = 0;
+
+    if (ctrl->minconn)
+      maxndoms = imax(nparts, nads);
+
+    /* Insert the boundary vertices in the priority queue */
+    irandArrayPermute(nbnd, perm, nbnd/4, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[perm[ii]];
+      rgain = (graph->ckrinfo[i].nnbrs > 0 ? 
+               1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) 
+               - graph->ckrinfo[i].id;
+      rpqInsert(queue, i, rgain);
+      vstatus[i] = VPQSTATUS_PRESENT;
+      ListInsert(nupd, updind, updptr, i);
+    }
+
+    /* Start extracting vertices from the queue and try to move them */
+    for (nmoved=0, iii=0;;iii++) {
+      if ((i = rpqGetTop(queue)) == -1) 
+        break;
+      vstatus[i] = VPQSTATUS_EXTRACTED;
+
+      myrinfo = graph->ckrinfo+i;
+      mynbrs  = ctrl->cnbrpool + myrinfo->inbr;
+
+      from = where[i];
+      vwgt = graph->vwgt[i];
+
+      /* Prevent moves that make 'from' domain underbalanced */
+      if (omode == OMODE_REFINE) {
+        if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) 
+          continue;   
+      }
+      else { /* OMODE_BALANCE */
+        if (pwgts[from]-vwgt < minwgt[from]) 
+          continue;   
+      }
+
+      if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk))
+        continue;
+
+      if (ctrl->minconn)
+        SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms);
+
+      /* Find the most promising subdomain to move to */
+      if (omode == OMODE_REFINE) {
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          gain = mynbrs[k].ed-myrinfo->id; 
+          if (gain >= 0 && pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain)  
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          gain = mynbrs[j].ed-myrinfo->id; 
+          if ((mynbrs[j].ed > mynbrs[k].ed && pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain) 
+              ||
+              (mynbrs[j].ed == mynbrs[k].ed && 
+               itpwgts[mynbrs[k].pid]*pwgts[to] < itpwgts[to]*pwgts[mynbrs[k].pid]))
+            k = j;
+        }
+
+        to = mynbrs[k].pid;
+
+        gain = mynbrs[k].ed-myrinfo->id;
+        if (!(gain > 0 
+              || (gain == 0  
+                  && (pwgts[from] >= maxwgt[from] 
+                      || itpwgts[to]*pwgts[from] > itpwgts[from]*(pwgts[to]+vwgt) 
+                      || (iii%2 == 0 && safetos[to] == 2)
+                     )
+                 )
+             )
+           )
+          continue;
+      }
+      else {  /* OMODE_BALANCE */
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          if (pwgts[to]+vwgt <= maxwgt[to] || 
+              itpwgts[from]*(pwgts[to]+vwgt) <= itpwgts[to]*pwgts[from]) 
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          if (itpwgts[mynbrs[k].pid]*pwgts[to] < itpwgts[to]*pwgts[mynbrs[k].pid]) 
+            k = j;
+        }
+
+        to = mynbrs[k].pid;
+
+        if (pwgts[from] < maxwgt[from] && pwgts[to] > minwgt[to] && 
+            mynbrs[k].ed-myrinfo->id < 0) 
+          continue;
+      }
+
+
+
+      /*=====================================================================
+      * If we got here, we can now move the vertex from 'from' to 'to' 
+      *======================================================================*/
+      graph->mincut -= mynbrs[k].ed-myrinfo->id;
+      nmoved++;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+          printf("\t\tMoving %6"PRIDX" to %3"PRIDX". Gain: %4"PRIDX". Cut: %6"PRIDX"\n", 
+              i, to, mynbrs[k].ed-myrinfo->id, graph->mincut));
+
+      /* Update the subdomain connectivity information */
+      if (ctrl->minconn) {
+        /* take care of i's move itself */
+        UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms);
+
+        /* take care of the adjancent vertices */
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          me = where[adjncy[j]];
+          if (me != from && me != to) {
+            UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], &maxndoms);
+            UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], &maxndoms);
+          }
+        }
+      }
+
+      /* Update ID/ED and BND related information for the moved vertex */
+      INC_DEC(pwgts[to], pwgts[from], vwgt);
+      UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, 
+          bndptr, bndind, bndtype);
+      
+      /* Update the degrees of adjacent vertices */
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii = adjncy[j];
+        me = where[ii];
+        myrinfo = graph->ckrinfo+ii;
+
+        oldnnbrs = myrinfo->nnbrs;
+
+        UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, 
+            from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype);
+
+        UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, 
+            nupd, updptr, updind, bndtype);
+
+        ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]);
+      }
+    }
+
+    graph->nbnd = nbnd;
+
+    /* Reset the vstatus and associated data structures */
+    for (i=0; i<nupd; i++) {
+      ASSERT(updptr[updind[i]] != -1);
+      ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT);
+      vstatus[updind[i]] = VPQSTATUS_NOTPRESENT;
+      updptr[updind[i]]  = -1;
+    }
+
+    if (ctrl->dbglvl&METIS_DBG_REFINE) {
+       printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"."
+              " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX,
+              pwgts[iargmin(nparts, pwgts)], imax(nparts, pwgts),
+              ComputeLoadImbalance(graph, nparts, ctrl->pijbm), 
+              graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where));
+       if (ctrl->minconn) 
+         printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+       printf("\n");
+    }
+
+    if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut))
+      break;
+  }
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! K-way refinement that minimizes the communication volume. This is a 
+    greedy routine and the vertices are visited in decreasing gv order.
+
+  \param graph is the graph that is being refined.
+  \param niter is the number of refinement iterations.
+  \param ffactor is the \em fudge-factor for allowing positive gain moves 
+         to violate the max-pwgt constraint.
+
+*/
+/**************************************************************************/
+void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode)
+{
+  /* Common variables to all types of kway-refinement/balancing routines */
+  idx_t i, ii, iii, j, k, l, pass, nvtxs, nparts, gain; 
+  idx_t from, me, to, oldcut, vwgt;
+  idx_t *xadj, *adjncy;
+  idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts;
+  idx_t nmoved, nupd, *vstatus, *updptr, *updind;
+  idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL;
+  idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL;
+  idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE);
+
+  /* Volume-specific/different variables */
+  ipq_t *queue;
+  idx_t oldvol, xgain;
+  idx_t *vmarker, *pmarker, *modind;
+  vkrinfo_t *myrinfo;
+  vnbr_t *mynbrs;
+
+  WCOREPUSH;
+
+  /* Link the graph fields */
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  
+  nparts = ctrl->nparts;
+
+  /* Setup the weight intervals of the various subdomains */
+  minwgt  = iwspacemalloc(ctrl, nparts);
+  maxwgt  = iwspacemalloc(ctrl, nparts);
+  itpwgts = iwspacemalloc(ctrl, nparts);
+
+  for (i=0; i<nparts; i++) {
+    itpwgts[i] = ctrl->tpwgts[i]*graph->tvwgt[0];
+    maxwgt[i]  = ctrl->tpwgts[i]*graph->tvwgt[0]*ctrl->ubfactors[0];
+    minwgt[i]  = ctrl->tpwgts[i]*graph->tvwgt[0]*(1.0/ctrl->ubfactors[0]);
+  }
+
+  perm = iwspacemalloc(ctrl, nvtxs);
+
+
+  /* This stores the valid target subdomains. It is used when ctrl->minconn to
+     control the subdomains to which moves are allowed to be made. 
+     When ctrl->minconn is false, the default values of 2 allow all moves to
+     go through and it does not interfere with the zero-gain move selection. */
+  safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->minconn) {
+    ComputeSubDomainGraph(ctrl, graph);
+
+    nads    = ctrl->nads;
+    adids   = ctrl->adids;
+    adwgts  = ctrl->adwgts;
+    doms    = iset(nparts, 0, ctrl->pvec1);
+  }
+
+
+  /* Setup updptr, updind like boundary info to keep track of the vertices whose
+     vstatus's need to be reset at the end of the inner iteration */
+  vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs));
+  updptr  = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+  updind  = iwspacemalloc(ctrl, nvtxs);
+
+  if (ctrl->contig) {
+    /* The arrays that will be used for limited check of articulation points */
+    bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+    bfsind = iwspacemalloc(ctrl, nvtxs);
+    bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  }
+
+  /* Vol-refinement specific working arrays */
+  modind  = iwspacemalloc(ctrl, nvtxs);
+  vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+     printf("%s: [%6"PRIDX" %6"PRIDX"]-[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL
+         ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX,
+         (omode == OMODE_REFINE ? "GRV" : "GBV"),
+         pwgts[iargmin(nparts, pwgts)], imax(nparts, pwgts), minwgt[0], maxwgt[0], 
+         ComputeLoadImbalance(graph, nparts, ctrl->pijbm), 
+         graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol);
+     if (ctrl->minconn) 
+       printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+     printf("\n");
+  }
+
+  queue = ipqCreate(nvtxs);
+
+
+  /*=====================================================================
+  * The top-level refinement loop 
+  *======================================================================*/
+  for (pass=0; pass<niter; pass++) {
+    ASSERT(ComputeVolume(graph, where) == graph->minvol);
+
+    if (omode == OMODE_BALANCE) {
+      /* Check to see if things are out of balance, given the tolerance */
+      for (i=0; i<nparts; i++) {
+        if (pwgts[i] > maxwgt[i])
+          break;
+      }
+      if (i == nparts) /* Things are balanced. Return right away */
+        break;
+    }
+
+    oldcut = graph->mincut;
+    oldvol = graph->minvol;
+    nupd   = 0;
+
+    if (ctrl->minconn)
+      maxndoms = imax(nparts, nads);
+
+    /* Insert the boundary vertices in the priority queue */
+    irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1);
+    for (ii=0; ii<graph->nbnd; ii++) {
+      i = bndind[perm[ii]];
+      ipqInsert(queue, i, graph->vkrinfo[i].gv);
+      vstatus[i] = VPQSTATUS_PRESENT;
+      ListInsert(nupd, updind, updptr, i);
+    }
+
+    /* Start extracting vertices from the queue and try to move them */
+    for (nmoved=0, iii=0;;iii++) {
+      if ((i = ipqGetTop(queue)) == -1) 
+        break;
+      vstatus[i] = VPQSTATUS_EXTRACTED;
+
+      myrinfo = graph->vkrinfo+i;
+      mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+      from = where[i];
+      vwgt = graph->vwgt[i];
+
+      /* Prevent moves that make 'from' domain underbalanced */
+      if (omode == OMODE_REFINE) {
+        if (myrinfo->nid > 0 && pwgts[from]-vwgt < minwgt[from]) 
+          continue;
+      }
+      else { /* OMODE_BALANCE */
+        if (pwgts[from]-vwgt < minwgt[from]) 
+          continue;
+      }
+
+      if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk))
+        continue;
+
+      if (ctrl->minconn)
+        SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms);
+
+      xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0);
+
+      /* Find the most promising subdomain to move to */
+      if (omode == OMODE_REFINE) {
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          gain = mynbrs[k].gv + xgain;
+          if (gain >= 0 && pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain)  
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          gain = mynbrs[j].gv + xgain;
+          if ((mynbrs[j].gv > mynbrs[k].gv && 
+               pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain) 
+              ||
+              (mynbrs[j].gv == mynbrs[k].gv && 
+               mynbrs[j].ned > mynbrs[k].ned &&
+               pwgts[to]+vwgt <= maxwgt[to]) 
+              ||
+              (mynbrs[j].gv == mynbrs[k].gv && 
+               mynbrs[j].ned == mynbrs[k].ned &&
+               itpwgts[mynbrs[k].pid]*pwgts[to] < itpwgts[to]*pwgts[mynbrs[k].pid])
+             )
+            k = j;
+        }
+        to = mynbrs[k].pid;
+
+        ASSERT(xgain+mynbrs[k].gv >= 0);
+
+        j = 0;
+        if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0)
+          j = 1;
+        else if (mynbrs[k].ned-myrinfo->nid == 0) {
+          if ((iii%2 == 0 && safetos[to] == 2) || 
+              pwgts[from] >= maxwgt[from] || 
+              itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from])
+            j = 1;
+        }
+        if (j == 0)
+          continue;
+      }
+      else { /* OMODE_BALANCE */
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          if (pwgts[to]+vwgt <= maxwgt[to] || 
+              itpwgts[from]*(pwgts[to]+vwgt) <= itpwgts[to]*pwgts[from])  
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          if (itpwgts[mynbrs[k].pid]*pwgts[to] < itpwgts[to]*pwgts[mynbrs[k].pid])
+            k = j;
+        }
+        to = mynbrs[k].pid;
+
+        if (pwgts[from] < maxwgt[from] && pwgts[to] > minwgt[to] && 
+            (xgain+mynbrs[k].gv < 0 || 
+             (xgain+mynbrs[k].gv == 0 &&  mynbrs[k].ned-myrinfo->nid < 0))
+           )
+          continue;
+      }
+          
+          
+      /*=====================================================================
+      * If we got here, we can now move the vertex from 'from' to 'to' 
+      *======================================================================*/
+      INC_DEC(pwgts[to], pwgts[from], vwgt);
+      graph->mincut -= mynbrs[k].ned-myrinfo->nid;
+      graph->minvol -= (xgain+mynbrs[k].gv);
+      where[i] = to;
+      nmoved++;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+          printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". "
+                 "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", 
+              i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, 
+              graph->mincut, graph->minvol));
+
+      /* Update the subdomain connectivity information */
+      if (ctrl->minconn) {
+        /* take care of i's move itself */
+        UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms);
+
+        /* take care of the adjancent vertices */
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          me = where[adjncy[j]];
+          if (me != from && me != to) {
+            UpdateEdgeSubDomainGraph(ctrl, from, me, -1, &maxndoms);
+            UpdateEdgeSubDomainGraph(ctrl, to, me, 1, &maxndoms);
+          }
+        }
+      }
+
+      /* Update the id/ed/gains/bnd/queue of potentially affected nodes */
+      KWayVolUpdate(ctrl, graph, i, from, to, queue, vstatus, &nupd, updptr, 
+          updind, bndtype, vmarker, pmarker, modind);
+
+      /*CheckKWayVolPartitionParams(ctrl, graph); */
+    }
+
+
+    /* Reset the vstatus and associated data structures */
+    for (i=0; i<nupd; i++) {
+      ASSERT(updptr[updind[i]] != -1);
+      ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT);
+      vstatus[updind[i]] = VPQSTATUS_NOTPRESENT;
+      updptr[updind[i]]  = -1;
+    }
+
+    if (ctrl->dbglvl&METIS_DBG_REFINE) {
+       printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"."
+              " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX,
+              pwgts[iargmin(nparts, pwgts)], imax(nparts, pwgts),
+              ComputeLoadImbalance(graph, nparts, ctrl->pijbm), 
+              graph->nbnd, nmoved, graph->mincut, graph->minvol);
+       if (ctrl->minconn) 
+         printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+       printf("\n");
+    }
+
+    if (nmoved == 0 || 
+        (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut))
+      break;
+  }
+
+  ipqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! K-way partitioning optimization in which the vertices are visited in 
+    decreasing ed/sqrt(nnbrs)-id order. Note this is just an 
+    approximation, as the ed is often split across different subdomains 
+    and the sqrt(nnbrs) is just a crude approximation.
+
+  \param graph is the graph that is being refined.
+  \param niter is the number of refinement iterations.
+  \param ffactor is the \em fudge-factor for allowing positive gain moves 
+         to violate the max-pwgt constraint.
+  \param omode is the type of optimization that will performed among
+         OMODE_REFINE and OMODE_BALANCE 
+         
+
+*/
+/**************************************************************************/
+void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode)
+{
+  /* Common variables to all types of kway-refinement/balancing routines */
+  idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; 
+  idx_t from, me, to, cto, oldcut;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt;
+  idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt;
+  idx_t nmoved, nupd, *vstatus, *updptr, *updind;
+  idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL;
+  idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL;
+  idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE);
+  real_t *ubfactors, *pijbm;
+  real_t origbal;
+
+  /* Edgecut-specific/different variables */
+  idx_t nbnd, oldnnbrs;
+  rpq_t *queue;
+  real_t rgain;
+  ckrinfo_t *myrinfo;
+  cnbr_t *mynbrs;
+
+  WCOREPUSH;
+
+  /* Link the graph fields */
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+
+  where = graph->where;
+  pwgts = graph->pwgts;
+  
+  nparts = ctrl->nparts;
+  pijbm  = ctrl->pijbm;
+
+
+  /* Determine the ubfactors. The method used is different based on omode. 
+     When OMODE_BALANCE, the ubfactors are those supplied by the user. 
+     When OMODE_REFINE, the ubfactors are the max of the current partition
+     and the user-specified ones. */
+  ubfactors = rwspacemalloc(ctrl, ncon);
+  ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors);
+  origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors);
+  if (omode == OMODE_BALANCE) {
+    rcopy(ncon, ctrl->ubfactors, ubfactors);
+  }
+  else {
+    for (i=0; i<ncon; i++)
+      ubfactors[i] = (ubfactors[i] > ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]);
+  }
+
+
+  /* Setup the weight intervals of the various subdomains */
+  minwgt  = iwspacemalloc(ctrl, nparts*ncon);
+  maxwgt  = iwspacemalloc(ctrl, nparts*ncon);
+
+  for (i=0; i<nparts; i++) {
+    for (j=0; j<ncon; j++) {
+      maxwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j];
+      /*minwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]);*/
+      minwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2;
+    }
+  }
+
+  perm = iwspacemalloc(ctrl, nvtxs);
+
+
+  /* This stores the valid target subdomains. It is used when ctrl->minconn to
+     control the subdomains to which moves are allowed to be made. 
+     When ctrl->minconn is false, the default values of 2 allow all moves to
+     go through and it does not interfere with the zero-gain move selection. */
+  safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->minconn) {
+    ComputeSubDomainGraph(ctrl, graph);
+
+    nads    = ctrl->nads;
+    adids   = ctrl->adids;
+    adwgts  = ctrl->adwgts;
+    doms    = iset(nparts, 0, ctrl->pvec1);
+  }
+
+
+  /* Setup updptr, updind like boundary info to keep track of the vertices whose
+     vstatus's need to be reset at the end of the inner iteration */
+  vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs));
+  updptr  = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+  updind  = iwspacemalloc(ctrl, nvtxs);
+
+  if (ctrl->contig) {
+    /* The arrays that will be used for limited check of articulation points */
+    bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+    bfsind = iwspacemalloc(ctrl, nvtxs);
+    bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  }
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+     printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL")," 
+            " Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %6"PRIDX", (%"PRIDX")",
+            (omode == OMODE_REFINE ? "GRC" : "GBC"),
+            imin(nparts*ncon, pwgts), imax(nparts*ncon, pwgts), imax(nparts*ncon, maxwgt),
+            ComputeLoadImbalance(graph, nparts, pijbm), origbal,
+            graph->nvtxs, graph->nbnd, graph->mincut, niter);
+     if (ctrl->minconn) 
+       printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+     printf("\n");
+  }
+
+  queue = rpqCreate(nvtxs);
+
+
+  /*=====================================================================
+  * The top-level refinement loop 
+  *======================================================================*/
+  for (pass=0; pass<niter; pass++) {
+    ASSERT(ComputeCut(graph, where) == graph->mincut);
+
+    /* In balancing mode, exit as soon as balance is reached */
+    if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0)) 
+      break;
+    
+    oldcut = graph->mincut;
+    nbnd   = graph->nbnd;
+    nupd   = 0;
+
+    if (ctrl->minconn)
+      maxndoms = imax(nparts, nads);
+
+    /* Insert the boundary vertices in the priority queue */
+    irandArrayPermute(nbnd, perm, nbnd/4, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[perm[ii]];
+      rgain = (graph->ckrinfo[i].nnbrs > 0 ? 
+               1.0*graph->ckrinfo[i].ed/sqrt(graph->ckrinfo[i].nnbrs) : 0.0) 
+               - graph->ckrinfo[i].id;
+      rpqInsert(queue, i, rgain);
+      vstatus[i] = VPQSTATUS_PRESENT;
+      ListInsert(nupd, updind, updptr, i);
+    }
+
+    /* Start extracting vertices from the queue and try to move them */
+    for (nmoved=0, iii=0;;iii++) {
+      if ((i = rpqGetTop(queue)) == -1) 
+        break;
+      vstatus[i] = VPQSTATUS_EXTRACTED;
+
+      myrinfo = graph->ckrinfo+i;
+      mynbrs  = ctrl->cnbrpool + myrinfo->inbr;
+
+      from = where[i];
+
+      /* Prevent moves that make 'from' domain underbalanced */
+      if (omode == OMODE_REFINE) {
+        if (myrinfo->id > 0 && 
+            !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minwgt+from*ncon))
+          continue;   
+      }
+      else { /* OMODE_BALANCE */
+        if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minwgt+from*ncon)) 
+          continue;   
+      }
+
+      if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk))
+        continue;
+
+      if (ctrl->minconn)
+        SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms);
+
+      /* Find the most promising subdomain to move to */
+      if (omode == OMODE_REFINE) {
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          gain = mynbrs[k].ed-myrinfo->id; 
+          if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon))
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        cto = to;
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          if ((mynbrs[j].ed > mynbrs[k].ed && 
+               ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon))
+              ||
+              (mynbrs[j].ed == mynbrs[k].ed && 
+               BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, 
+                   1, pwgts+cto*ncon, pijbm+cto*ncon,
+                   1, pwgts+to*ncon, pijbm+to*ncon))) {
+            k   = j;
+            cto = to;
+          }
+        }
+        to = cto;
+
+        gain = mynbrs[k].ed-myrinfo->id;
+        if (!(gain > 0 
+              || (gain == 0  
+                  && (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                             -1, pwgts+from*ncon, pijbm+from*ncon,
+                             +1, pwgts+to*ncon, pijbm+to*ncon)
+                      || (iii%2 == 0 && safetos[to] == 2)
+                     )
+                 )
+             )
+           )
+          continue;
+      }
+      else {  /* OMODE_BALANCE */
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon) || 
+              BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                  -1, pwgts+from*ncon, pijbm+from*ncon,
+                  +1, pwgts+to*ncon, pijbm+to*ncon))
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        cto = to;
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors, 
+                   1, pwgts+cto*ncon, pijbm+cto*ncon,
+                   1, pwgts+to*ncon, pijbm+to*ncon)) {
+            k   = j;
+            cto = to;
+          }
+        }
+        to = cto;
+
+        if (mynbrs[k].ed-myrinfo->id < 0 &&
+            !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                  -1, pwgts+from*ncon, pijbm+from*ncon,
+                  +1, pwgts+to*ncon, pijbm+to*ncon))
+          continue;
+      }
+
+
+
+      /*=====================================================================
+      * If we got here, we can now move the vertex from 'from' to 'to' 
+      *======================================================================*/
+      graph->mincut -= mynbrs[k].ed-myrinfo->id;
+      nmoved++;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+          printf("\t\tMoving %6"PRIDX" to %3"PRIDX". Gain: %4"PRIDX". Cut: %6"PRIDX"\n", 
+              i, to, mynbrs[k].ed-myrinfo->id, graph->mincut));
+
+      /* Update the subdomain connectivity information */
+      if (ctrl->minconn) {
+        /* take care of i's move itself */
+        UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, &maxndoms);
+
+        /* take care of the adjancent vertices */
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          me = where[adjncy[j]];
+          if (me != from && me != to) {
+            UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], &maxndoms);
+            UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], &maxndoms);
+          }
+        }
+      }
+
+      /* Update ID/ED and BND related information for the moved vertex */
+      iaxpy(ncon,  1, vwgt+i*ncon, 1, pwgts+to*ncon,   1);
+      iaxpy(ncon, -1, vwgt+i*ncon, 1, pwgts+from*ncon, 1);
+      UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, 
+          nbnd, bndptr, bndind, bndtype);
+      
+      /* Update the degrees of adjacent vertices */
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii = adjncy[j];
+        me = where[ii];
+        myrinfo = graph->ckrinfo+ii;
+
+        oldnnbrs = myrinfo->nnbrs;
+
+        UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me, 
+            from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, bndtype);
+
+        UpdateQueueInfo(queue, vstatus, ii, me, from, to, myrinfo, oldnnbrs, 
+            nupd, updptr, updind, bndtype);
+
+        ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]);
+      }
+    }
+
+    graph->nbnd = nbnd;
+
+    /* Reset the vstatus and associated data structures */
+    for (i=0; i<nupd; i++) {
+      ASSERT(updptr[updind[i]] != -1);
+      ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT);
+      vstatus[updind[i]] = VPQSTATUS_NOTPRESENT;
+      updptr[updind[i]]  = -1;
+    }
+
+    if (ctrl->dbglvl&METIS_DBG_REFINE) {
+       printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"."
+              " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX,
+              imin(nparts*ncon, pwgts), imax(nparts*ncon, pwgts), 
+              ComputeLoadImbalance(graph, nparts, pijbm), 
+              graph->nbnd, nmoved, graph->mincut, ComputeVolume(graph, where));
+       if (ctrl->minconn) 
+         printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+       printf("\n");
+    }
+
+    if (nmoved == 0 || (omode == OMODE_REFINE && graph->mincut == oldcut))
+      break;
+  }
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! K-way refinement that minimizes the communication volume. This is a 
+    greedy routine and the vertices are visited in decreasing gv order.
+
+  \param graph is the graph that is being refined.
+  \param niter is the number of refinement iterations.
+  \param ffactor is the \em fudge-factor for allowing positive gain moves 
+         to violate the max-pwgt constraint.
+
+*/
+/**************************************************************************/
+void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode)
+{
+  /* Common variables to all types of kway-refinement/balancing routines */
+  idx_t i, ii, iii, j, k, l, pass, nvtxs, ncon, nparts, gain; 
+  idx_t from, me, to, cto, oldcut;
+  idx_t *xadj, *vwgt, *adjncy;
+  idx_t *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt;
+  idx_t nmoved, nupd, *vstatus, *updptr, *updind;
+  idx_t maxndoms, *safetos=NULL, *nads=NULL, *doms=NULL, **adids=NULL, **adwgts=NULL;
+  idx_t *bfslvl=NULL, *bfsind=NULL, *bfsmrk=NULL;
+  idx_t bndtype = (omode == OMODE_REFINE ? BNDTYPE_REFINE : BNDTYPE_BALANCE);
+  real_t *ubfactors, *pijbm;
+  real_t origbal;
+
+  /* Volume-specific/different variables */
+  ipq_t *queue;
+  idx_t oldvol, xgain;
+  idx_t *vmarker, *pmarker, *modind;
+  vkrinfo_t *myrinfo;
+  vnbr_t *mynbrs;
+
+  WCOREPUSH;
+
+  /* Link the graph fields */
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  
+  nparts = ctrl->nparts;
+  pijbm  = ctrl->pijbm;
+
+
+  /* Determine the ubfactors. The method used is different based on omode. 
+     When OMODE_BALANCE, the ubfactors are those supplied by the user. 
+     When OMODE_REFINE, the ubfactors are the max of the current partition
+     and the user-specified ones. */
+  ubfactors = rwspacemalloc(ctrl, ncon);
+  ComputeLoadImbalanceVec(graph, nparts, pijbm, ubfactors);
+  origbal = rvecmaxdiff(ncon, ubfactors, ctrl->ubfactors);
+  if (omode == OMODE_BALANCE) {
+    rcopy(ncon, ctrl->ubfactors, ubfactors);
+  }
+  else {
+    for (i=0; i<ncon; i++)
+      ubfactors[i] = (ubfactors[i] > ctrl->ubfactors[i] ? ubfactors[i] : ctrl->ubfactors[i]);
+  }
+
+
+  /* Setup the weight intervals of the various subdomains */
+  minwgt  = iwspacemalloc(ctrl, nparts*ncon);
+  maxwgt  = iwspacemalloc(ctrl, nparts*ncon);
+
+  for (i=0; i<nparts; i++) {
+    for (j=0; j<ncon; j++) {
+      maxwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*ubfactors[j];
+      /*minwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*(.9/ubfactors[j]); */
+      minwgt[i*ncon+j]  = ctrl->tpwgts[i*ncon+j]*graph->tvwgt[j]*.2;
+    }
+  }
+
+  perm = iwspacemalloc(ctrl, nvtxs);
+
+
+  /* This stores the valid target subdomains. It is used when ctrl->minconn to
+     control the subdomains to which moves are allowed to be made. 
+     When ctrl->minconn is false, the default values of 2 allow all moves to
+     go through and it does not interfere with the zero-gain move selection. */
+  safetos = iset(nparts, 2, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->minconn) {
+    ComputeSubDomainGraph(ctrl, graph);
+
+    nads    = ctrl->nads;
+    adids   = ctrl->adids;
+    adwgts  = ctrl->adwgts;
+    doms    = iset(nparts, 0, ctrl->pvec1);
+  }
+
+
+  /* Setup updptr, updind like boundary info to keep track of the vertices whose
+     vstatus's need to be reset at the end of the inner iteration */
+  vstatus = iset(nvtxs, VPQSTATUS_NOTPRESENT, iwspacemalloc(ctrl, nvtxs));
+  updptr  = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+  updind  = iwspacemalloc(ctrl, nvtxs);
+
+  if (ctrl->contig) {
+    /* The arrays that will be used for limited check of articulation points */
+    bfslvl = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+    bfsind = iwspacemalloc(ctrl, nvtxs);
+    bfsmrk = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  }
+
+  /* Vol-refinement specific working arrays */
+  modind  = iwspacemalloc(ctrl, nvtxs);
+  vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+  pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+
+  if (ctrl->dbglvl&METIS_DBG_REFINE) {
+     printf("%s: [%6"PRIDX" %6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL"(%.3"PRREAL"),"
+         ", Nv-Nb[%6"PRIDX" %6"PRIDX"], Cut: %5"PRIDX", Vol: %5"PRIDX", (%"PRIDX")",
+         (omode == OMODE_REFINE ? "GRV" : "GBV"),
+         imin(nparts*ncon, pwgts), imax(nparts*ncon, pwgts), imax(nparts*ncon, maxwgt),
+         ComputeLoadImbalance(graph, nparts, pijbm), origbal,
+         graph->nvtxs, graph->nbnd, graph->mincut, graph->minvol, niter);
+     if (ctrl->minconn) 
+       printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+     printf("\n");
+  }
+
+  queue = ipqCreate(nvtxs);
+
+
+  /*=====================================================================
+  * The top-level refinement loop 
+  *======================================================================*/
+  for (pass=0; pass<niter; pass++) {
+    ASSERT(ComputeVolume(graph, where) == graph->minvol);
+
+    /* In balancing mode, exit as soon as balance is reached */
+    if (omode == OMODE_BALANCE && IsBalanced(ctrl, graph, 0))
+      break;
+
+    oldcut = graph->mincut;
+    oldvol = graph->minvol;
+    nupd   = 0;
+
+    if (ctrl->minconn)
+      maxndoms = imax(nparts, nads);
+
+    /* Insert the boundary vertices in the priority queue */
+    irandArrayPermute(graph->nbnd, perm, graph->nbnd/4, 1);
+    for (ii=0; ii<graph->nbnd; ii++) {
+      i = bndind[perm[ii]];
+      ipqInsert(queue, i, graph->vkrinfo[i].gv);
+      vstatus[i] = VPQSTATUS_PRESENT;
+      ListInsert(nupd, updind, updptr, i);
+    }
+
+    /* Start extracting vertices from the queue and try to move them */
+    for (nmoved=0, iii=0;;iii++) {
+      if ((i = ipqGetTop(queue)) == -1) 
+        break;
+      vstatus[i] = VPQSTATUS_EXTRACTED;
+
+      myrinfo = graph->vkrinfo+i;
+      mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+      from = where[i];
+
+      /* Prevent moves that make 'from' domain underbalanced */
+      if (omode == OMODE_REFINE) {
+        if (myrinfo->nid > 0 &&
+            !ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minwgt+from*ncon))
+          continue;
+      }
+      else { /* OMODE_BALANCE */
+        if (!ivecaxpygez(ncon, -1, vwgt+i*ncon, pwgts+from*ncon, minwgt+from*ncon))
+          continue;
+      }
+
+      if (ctrl->contig && IsArticulationNode(i, xadj, adjncy, where, bfslvl, bfsind, bfsmrk))
+        continue;
+
+      if (ctrl->minconn)
+        SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, doms);
+
+      xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? graph->vsize[i] : 0);
+
+      /* Find the most promising subdomain to move to */
+      if (omode == OMODE_REFINE) {
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          gain = mynbrs[k].gv + xgain;
+          if (gain >= 0 && ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon))
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        cto = to;
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          gain = mynbrs[j].gv + xgain;
+          if ((mynbrs[j].gv > mynbrs[k].gv && 
+               ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon))
+              ||
+              (mynbrs[j].gv == mynbrs[k].gv && 
+               mynbrs[j].ned > mynbrs[k].ned &&
+               ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon))
+              ||
+              (mynbrs[j].gv == mynbrs[k].gv && 
+               mynbrs[j].ned == mynbrs[k].ned &&
+               BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                   1, pwgts+cto*ncon, pijbm+cto*ncon,
+                   1, pwgts+to*ncon, pijbm+to*ncon))) {
+            k   = j;
+            cto = to;
+          }
+        }
+        to = cto;
+
+        j = 0;
+        if (xgain+mynbrs[k].gv > 0 || mynbrs[k].ned-myrinfo->nid > 0)
+          j = 1;
+        else if (mynbrs[k].ned-myrinfo->nid == 0) {
+          if ((iii%2 == 0 && safetos[to] == 2) ||
+              BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                  -1, pwgts+from*ncon, pijbm+from*ncon,
+                  +1, pwgts+to*ncon, pijbm+to*ncon))
+            j = 1;
+        }
+        if (j == 0)
+          continue;
+      }
+      else { /* OMODE_BALANCE */
+        for (k=myrinfo->nnbrs-1; k>=0; k--) {
+          if (!safetos[to=mynbrs[k].pid])
+            continue;
+          if (ivecaxpylez(ncon, 1, vwgt+i*ncon, pwgts+to*ncon, maxwgt+to*ncon) ||
+              BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                  -1, pwgts+from*ncon, pijbm+from*ncon,
+                  +1, pwgts+to*ncon, pijbm+to*ncon))
+            break;
+        }
+        if (k < 0)
+          continue;  /* break out if you did not find a candidate */
+
+        cto = to;
+        for (j=k-1; j>=0; j--) {
+          if (!safetos[to=mynbrs[j].pid])
+            continue;
+          if (BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                  1, pwgts+cto*ncon, pijbm+cto*ncon,
+                  1, pwgts+to*ncon, pijbm+to*ncon)) {
+            k   = j;
+            cto = to;
+          }
+        }
+        to = cto;
+
+        if ((xgain+mynbrs[k].gv < 0 || 
+             (xgain+mynbrs[k].gv == 0 && mynbrs[k].ned-myrinfo->nid < 0))
+            &&
+            !BetterBalanceKWay(ncon, vwgt+i*ncon, ubfactors,
+                 -1, pwgts+from*ncon, pijbm+from*ncon,
+                 +1, pwgts+to*ncon, pijbm+to*ncon))
+          continue;
+      }
+          
+          
+      /*=====================================================================
+      * If we got here, we can now move the vertex from 'from' to 'to' 
+      *======================================================================*/
+      graph->mincut -= mynbrs[k].ned-myrinfo->nid;
+      graph->minvol -= (xgain+mynbrs[k].gv);
+      where[i] = to;
+      nmoved++;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO, 
+          printf("\t\tMoving %6"PRIDX" from %3"PRIDX" to %3"PRIDX". "
+                 "Gain: [%4"PRIDX" %4"PRIDX"]. Cut: %6"PRIDX", Vol: %6"PRIDX"\n", 
+              i, from, to, xgain+mynbrs[k].gv, mynbrs[k].ned-myrinfo->nid, 
+              graph->mincut, graph->minvol));
+
+      /* Update the subdomain connectivity information */
+      if (ctrl->minconn) {
+        /* take care of i's move itself */
+        UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->nid-mynbrs[k].ned, &maxndoms);
+
+        /* take care of the adjancent vertices */
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          me = where[adjncy[j]];
+          if (me != from && me != to) {
+            UpdateEdgeSubDomainGraph(ctrl, from, me, -1, &maxndoms);
+            UpdateEdgeSubDomainGraph(ctrl, to, me, 1, &maxndoms);
+          }
+        }
+      }
+
+      /* Update pwgts */
+      iaxpy(ncon,  1, vwgt+i*ncon, 1, pwgts+to*ncon,   1);
+      iaxpy(ncon, -1, vwgt+i*ncon, 1, pwgts+from*ncon, 1);
+
+      /* Update the id/ed/gains/bnd/queue of potentially affected nodes */
+      KWayVolUpdate(ctrl, graph, i, from, to, queue, vstatus, &nupd, updptr, 
+          updind, bndtype, vmarker, pmarker, modind);
+
+      /*CheckKWayVolPartitionParams(ctrl, graph); */
+    }
+
+
+    /* Reset the vstatus and associated data structures */
+    for (i=0; i<nupd; i++) {
+      ASSERT(updptr[updind[i]] != -1);
+      ASSERT(vstatus[updind[i]] != VPQSTATUS_NOTPRESENT);
+      vstatus[updind[i]] = VPQSTATUS_NOTPRESENT;
+      updptr[updind[i]]  = -1;
+    }
+
+    if (ctrl->dbglvl&METIS_DBG_REFINE) {
+       printf("\t[%6"PRIDX" %6"PRIDX"], Bal: %5.3"PRREAL", Nb: %6"PRIDX"."
+              " Nmoves: %5"PRIDX", Cut: %6"PRIDX", Vol: %6"PRIDX,
+              imin(nparts*ncon, pwgts), imax(nparts*ncon, pwgts), 
+              ComputeLoadImbalance(graph, nparts, pijbm), 
+              graph->nbnd, nmoved, graph->mincut, graph->minvol);
+       if (ctrl->minconn) 
+         printf(", Doms: [%3"PRIDX" %4"PRIDX"]", imax(nparts, nads), isum(nparts, nads,1));
+       printf("\n");
+    }
+
+    if (nmoved == 0 || 
+        (omode == OMODE_REFINE && graph->minvol == oldvol && graph->mincut == oldcut))
+      break;
+  }
+
+  ipqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function performs an approximate articulation vertex test.
+    It assumes that the bfslvl, bfsind, and bfsmrk arrays are initialized
+    appropriately. */
+/*************************************************************************/
+idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where,
+          idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk)
+{
+  idx_t ii, j, k=0, head, tail, nhits, tnhits, from, BFSDEPTH=5;
+
+  from = where[i];
+
+  /* Determine if the vertex is safe to move from a contiguity standpoint */
+  for (tnhits=0, j=xadj[i]; j<xadj[i+1]; j++) {
+    if (where[adjncy[j]] == from) {
+      ASSERT(bfsmrk[adjncy[j]] == 0);
+      ASSERT(bfslvl[adjncy[j]] == 0);
+      bfsmrk[k=adjncy[j]] = 1;
+      tnhits++;
+    }
+  }
+
+  /* Easy cases */
+  if (tnhits == 0)
+    return 0;
+  if (tnhits == 1) {
+    bfsmrk[k] = 0;
+    return 0;
+  }
+
+  ASSERT(bfslvl[i] == 0);
+  bfslvl[i] = 1;
+
+  bfsind[0] = k; /* That was the last one from the previous loop */
+  bfslvl[k] = 1;
+  bfsmrk[k] = 0;
+  head = 0;
+  tail = 1;
+
+  /* Do a limited BFS traversal to see if you can get to all the other nodes */
+  for (nhits=1; head<tail; ) {
+    ii = bfsind[head++];
+    for (j=xadj[ii]; j<xadj[ii+1]; j++) {
+      if (where[k=adjncy[j]] == from) {
+        if (bfsmrk[k]) {
+          bfsmrk[k] = 0;
+          if (++nhits == tnhits)
+            break;
+        }
+        if (bfslvl[k] == 0 && bfslvl[ii] < BFSDEPTH) {
+          bfsind[tail++] = k;
+          bfslvl[k] = bfslvl[ii]+1;
+        }
+      }
+    }
+    if (nhits == tnhits)
+      break;
+  }
+
+  /* Reset the various BFS related arrays */
+  bfslvl[i] = 0;
+  for (j=0; j<tail; j++)
+    bfslvl[bfsind[j]] = 0;
+
+
+  /* Reset the bfsmrk array for the next vertex when has not already being cleared */
+  if (nhits < tnhits) {
+    for (j=xadj[i]; j<xadj[i+1]; j++) 
+      if (where[adjncy[j]] == from) 
+        bfsmrk[adjncy[j]] = 0;
+  }
+
+  return (nhits != tnhits);
+}
+
+
+/*************************************************************************/
+/*! 
+ This function updates the edge and volume gains due to a vertex movement. 
+ v from 'from' to 'to'.
+
+ \param ctrl is the control structure.
+ \param graph is the graph being partitioned.
+ \param v is the vertex that is moving.
+ \param from is the original partition of v.
+ \param to is the new partition of v.
+ \param queue is the priority queue. If the queue is NULL, no priority-queue
+        related updates are performed. 
+ \param vstatus is an array that marks the status of the vertex in terms
+        of the priority queue. If queue is NULL, this parameter is ignored.
+ \param r_nqupd is the number of vertices that have been inserted/removed
+        from the queue. If queue is NULL, this parameter is ignored.
+ \param updptr stores the index of each vertex in updind. If queue is NULL, 
+        this parameter is ignored.
+ \param updind is the list of vertices that have been inserted/removed from 
+        the queue. If queue is NULL, this parameter is ignored.
+ \param vmarker is of size nvtxs and is used internally as a temporary array. 
+        On entry and return all of its entries are 0.
+ \param pmarker is of sie nparts and is used internally as a temporary marking
+        array. On entry and return all of its entries are -1.
+ \param modind is an array of size nvtxs and is used to keep track of the 
+        list of vertices whose gains need to be updated.
+*/
+/*************************************************************************/
+void KWayVolUpdate(ctrl_t *ctrl, graph_t *graph, idx_t v, idx_t from, 
+         idx_t to, ipq_t *queue, idx_t *vstatus, idx_t *r_nupd, idx_t *updptr, 
+         idx_t *updind, idx_t bndtype, idx_t *vmarker, idx_t *pmarker, 
+         idx_t *modind)
+{
+  idx_t i, ii, iii, j, jj, k, kk, l, u, nmod, other, me, myidx; 
+  idx_t *xadj, *vsize, *adjncy, *where;
+  vkrinfo_t *myrinfo, *orinfo;
+  vnbr_t *mynbrs, *onbrs;
+
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vsize  = graph->vsize;
+  where  = graph->where;
+
+  myrinfo = graph->vkrinfo+v;
+  mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+
+  /*======================================================================
+   * Remove the contributions on the gain made by 'v'. 
+   *=====================================================================*/
+  for (k=0; k<myrinfo->nnbrs; k++)
+    pmarker[mynbrs[k].pid] = k;
+  pmarker[from] = k;
+
+  myidx = pmarker[to];  /* Keep track of the index in mynbrs of the 'to' domain */
+
+  for (j=xadj[v]; j<xadj[v+1]; j++) {
+    ii     = adjncy[j];
+    other  = where[ii];
+    orinfo = graph->vkrinfo+ii;
+    onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+    if (other == from) {
+      for (k=0; k<orinfo->nnbrs; k++) {
+        if (pmarker[onbrs[k].pid] == -1) 
+          onbrs[k].gv += vsize[v];
+      }
+    }
+    else {
+      ASSERT(pmarker[other] != -1);
+
+      if (mynbrs[pmarker[other]].ned > 1) {
+        for (k=0; k<orinfo->nnbrs; k++) {
+          if (pmarker[onbrs[k].pid] == -1) 
+            onbrs[k].gv += vsize[v];
+        }
+      }
+      else { /* There is only one connection */
+        for (k=0; k<orinfo->nnbrs; k++) {
+          if (pmarker[onbrs[k].pid] != -1) 
+            onbrs[k].gv -= vsize[v];
+        }
+      }
+    }
+  }
+
+  for (k=0; k<myrinfo->nnbrs; k++)
+    pmarker[mynbrs[k].pid] = -1;
+  pmarker[from] = -1;
+
+
+  /*======================================================================
+   * Update the id/ed of vertex 'v'
+   *=====================================================================*/
+  if (myidx == -1) {
+    myidx = myrinfo->nnbrs++;
+    ASSERT(myidx < xadj[v+1]-xadj[v]);
+    mynbrs[myidx].ned = 0;
+  }
+  myrinfo->ned += myrinfo->nid-mynbrs[myidx].ned;
+  SWAP(myrinfo->nid, mynbrs[myidx].ned, j);
+  if (mynbrs[myidx].ned == 0) 
+    mynbrs[myidx] = mynbrs[--myrinfo->nnbrs];
+  else
+    mynbrs[myidx].pid = from;
+
+
+  /*======================================================================
+   * Update the degrees of adjacent vertices and their volume gains
+   *=====================================================================*/
+  vmarker[v] = 1;
+  modind[0]  = v;
+  nmod       = 1;
+  for (j=xadj[v]; j<xadj[v+1]; j++) {
+    ii = adjncy[j];
+    me = where[ii];
+
+    if (!vmarker[ii]) {  /* The marking is done for boundary and max gv calculations */
+      vmarker[ii] = 2;
+      modind[nmod++] = ii;
+    }
+
+    myrinfo = graph->vkrinfo+ii;
+    if (myrinfo->inbr == -1) 
+      myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[ii+1]-xadj[ii]+1);
+    mynbrs = ctrl->vnbrpool + myrinfo->inbr;
+
+    if (me == from) {
+      INC_DEC(myrinfo->ned, myrinfo->nid, 1);
+    } 
+    else if (me == to) {
+      INC_DEC(myrinfo->nid, myrinfo->ned, 1);
+    }
+
+    /* Remove the edgeweight from the 'pid == from' entry of the vertex */
+    if (me != from) {
+      for (k=0; k<myrinfo->nnbrs; k++) {
+        if (mynbrs[k].pid == from) {
+          if (mynbrs[k].ned == 1) {
+            mynbrs[k] = mynbrs[--myrinfo->nnbrs];
+            vmarker[ii] = 1;  /* You do a complete .gv calculation */
+
+            /* All vertices adjacent to 'ii' need to be updated */
+            for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
+              u      = adjncy[jj];
+              other  = where[u];
+              orinfo = graph->vkrinfo+u;
+              onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+              for (kk=0; kk<orinfo->nnbrs; kk++) {
+                if (onbrs[kk].pid == from) {
+                  onbrs[kk].gv -= vsize[ii];
+                  if (!vmarker[u]) { /* Need to update boundary etc */
+                    vmarker[u]      = 2;
+                    modind[nmod++] = u;
+                  }
+                  break;
+                }
+              }
+            }
+          }
+          else {
+            mynbrs[k].ned--;
+
+            /* Update the gv due to single 'ii' connection to 'from' */
+            if (mynbrs[k].ned == 1) {
+              /* find the vertex 'u' that 'ii' was connected into 'from' */
+              for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
+                u     = adjncy[jj];
+                other = where[u];
+
+                if (other == from) {
+                  orinfo = graph->vkrinfo+u;
+                  onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+                  /* The following is correct because domains in common
+                     between ii and u will lead to a reduction over the
+                     previous gain, whereas domains only in u but not in
+                     ii, will lead to no change as opposed to the earlier
+                     increase */
+                  for (kk=0; kk<orinfo->nnbrs; kk++) 
+                    onbrs[kk].gv += vsize[ii];
+
+                  if (!vmarker[u]) { /* Need to update boundary etc */
+                    vmarker[u]     = 2;
+                    modind[nmod++] = u;
+                  }
+                  break;  
+                }
+              }
+            }
+          }
+          break; 
+        }
+      }
+    }
+
+
+    /* Add the edgeweight to the 'pid == to' entry of the vertex */
+    if (me != to) {
+      for (k=0; k<myrinfo->nnbrs; k++) {
+        if (mynbrs[k].pid == to) {
+          mynbrs[k].ned++;
+
+          /* Update the gv due to non-single 'ii' connection to 'to' */
+          if (mynbrs[k].ned == 2) {
+            /* find the vertex 'u' that 'ii' was connected into 'to' */
+            for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
+              u     = adjncy[jj];
+              other = where[u];
+
+              if (u != v && other == to) {
+                orinfo = graph->vkrinfo+u;
+                onbrs  = ctrl->vnbrpool + orinfo->inbr;
+                for (kk=0; kk<orinfo->nnbrs; kk++) 
+                  onbrs[kk].gv -= vsize[ii];
+
+                if (!vmarker[u]) { /* Need to update boundary etc */
+                  vmarker[u]      = 2;
+                  modind[nmod++] = u;
+                }
+                break;  
+              }
+            }
+          }
+          break;
+        }
+      }
+
+      if (k == myrinfo->nnbrs) {
+        mynbrs[myrinfo->nnbrs].pid   = to;
+        mynbrs[myrinfo->nnbrs++].ned = 1;
+        vmarker[ii] = 1;  /* You do a complete .gv calculation */
+
+        /* All vertices adjacent to 'ii' need to be updated */
+        for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
+          u      = adjncy[jj];
+          other  = where[u];
+          orinfo = graph->vkrinfo+u;
+          onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+          for (kk=0; kk<orinfo->nnbrs; kk++) {
+            if (onbrs[kk].pid == to) {
+              onbrs[kk].gv += vsize[ii];
+              if (!vmarker[u]) { /* Need to update boundary etc */
+                vmarker[u] = 2;
+                modind[nmod++] = u;
+              }
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    ASSERT(myrinfo->nnbrs <= xadj[ii+1]-xadj[ii]);
+  }
+
+
+  /*======================================================================
+   * Add the contributions on the volume gain due to 'v'
+   *=====================================================================*/
+  myrinfo = graph->vkrinfo+v;
+  mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+  for (k=0; k<myrinfo->nnbrs; k++)
+    pmarker[mynbrs[k].pid] = k;
+  pmarker[to] = k;
+
+  for (j=xadj[v]; j<xadj[v+1]; j++) {
+    ii     = adjncy[j];
+    other  = where[ii];
+    orinfo = graph->vkrinfo+ii;
+    onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+    if (other == to) {
+      for (k=0; k<orinfo->nnbrs; k++) {
+        if (pmarker[onbrs[k].pid] == -1) 
+          onbrs[k].gv -= vsize[v];
+      }
+    }
+    else {
+      ASSERT(pmarker[other] != -1);
+
+      if (mynbrs[pmarker[other]].ned > 1) {
+        for (k=0; k<orinfo->nnbrs; k++) {
+          if (pmarker[onbrs[k].pid] == -1) 
+            onbrs[k].gv -= vsize[v];
+        }
+      }
+      else { /* There is only one connection */
+        for (k=0; k<orinfo->nnbrs; k++) {
+          if (pmarker[onbrs[k].pid] != -1) 
+            onbrs[k].gv += vsize[v];
+        }
+      }
+    }
+  }
+  for (k=0; k<myrinfo->nnbrs; k++)
+    pmarker[mynbrs[k].pid] = -1;
+  pmarker[to] = -1;
+
+
+  /*======================================================================
+   * Recompute the volume information of the 'hard' nodes, and update the
+   * max volume gain for all the modified vertices and the priority queue
+   *=====================================================================*/
+  for (iii=0; iii<nmod; iii++) {
+    i  = modind[iii];
+    me = where[i];
+
+    myrinfo = graph->vkrinfo+i;
+    mynbrs  = ctrl->vnbrpool + myrinfo->inbr;
+
+    if (vmarker[i] == 1) {  /* Only complete gain updates go through */
+      for (k=0; k<myrinfo->nnbrs; k++) 
+        mynbrs[k].gv = 0;
+
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii     = adjncy[j];
+        other  = where[ii];
+        orinfo = graph->vkrinfo+ii;
+        onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+        for (kk=0; kk<orinfo->nnbrs; kk++) 
+          pmarker[onbrs[kk].pid] = kk;
+        pmarker[other] = 1;
+
+        if (me == other) {
+          /* Find which domains 'i' is connected and 'ii' is not and update their gain */
+          for (k=0; k<myrinfo->nnbrs; k++) {
+            if (pmarker[mynbrs[k].pid] == -1)
+              mynbrs[k].gv -= vsize[ii];
+          }
+        }
+        else {
+          ASSERT(pmarker[me] != -1);
+
+          /* I'm the only connection of 'ii' in 'me' */
+          if (onbrs[pmarker[me]].ned == 1) { 
+            /* Increase the gains for all the common domains between 'i' and 'ii' */
+            for (k=0; k<myrinfo->nnbrs; k++) {
+              if (pmarker[mynbrs[k].pid] != -1) 
+                mynbrs[k].gv += vsize[ii];
+            }
+          }
+          else {
+            /* Find which domains 'i' is connected and 'ii' is not and update their gain */
+            for (k=0; k<myrinfo->nnbrs; k++) {
+              if (pmarker[mynbrs[k].pid] == -1) 
+                mynbrs[k].gv -= vsize[ii];
+            }
+          }
+        }
+
+        for (kk=0; kk<orinfo->nnbrs; kk++) 
+          pmarker[onbrs[kk].pid] = -1;
+        pmarker[other] = -1;
+  
+      }
+    }
+
+    /* Compute the overall gv for that node */
+    myrinfo->gv = IDX_MIN;
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      if (mynbrs[k].gv > myrinfo->gv)
+        myrinfo->gv = mynbrs[k].gv;
+    }
+
+    /* Add the xtra gain due to id == 0 */
+    if (myrinfo->ned > 0 && myrinfo->nid == 0)
+      myrinfo->gv += vsize[i];
+
+
+    /*======================================================================
+     * Maintain a consistent boundary
+     *=====================================================================*/
+    if (bndtype == BNDTYPE_REFINE) {
+      if (myrinfo->gv >= 0 && graph->bndptr[i] == -1)
+        BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i);
+
+      if (myrinfo->gv < 0 && graph->bndptr[i] != -1)
+        BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i);
+    }
+    else {
+      if (myrinfo->ned > 0 && graph->bndptr[i] == -1)
+        BNDInsert(graph->nbnd, graph->bndind, graph->bndptr, i);
+
+      if (myrinfo->ned == 0 && graph->bndptr[i] != -1)
+        BNDDelete(graph->nbnd, graph->bndind, graph->bndptr, i);
+    }
+
+
+    /*======================================================================
+     * Update the priority queue appropriately (if allowed)
+     *=====================================================================*/
+    if (queue != NULL) {
+      if (vstatus[i] != VPQSTATUS_EXTRACTED) {
+        if (graph->bndptr[i] != -1) { /* In-boundary vertex */
+          if (vstatus[i] == VPQSTATUS_PRESENT) {
+            ipqUpdate(queue, i, myrinfo->gv);
+          }
+          else {
+            ipqInsert(queue, i, myrinfo->gv);
+            vstatus[i] = VPQSTATUS_PRESENT;
+            ListInsert(*r_nupd, updind, updptr, i);
+          }
+        }
+        else { /* Off-boundary vertex */
+          if (vstatus[i] == VPQSTATUS_PRESENT) {
+            ipqDelete(queue, i);
+            vstatus[i] = VPQSTATUS_NOTPRESENT;
+            ListDelete(*r_nupd, updind, updptr, i);
+          }
+        }
+      }
+    }
+  
+    vmarker[i] = 0;
+  }
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/kwayrefine.c b/3rdParty/metis/metis-5.1.0/libmetis/kwayrefine.c
new file mode 100644
index 000000000..0e3c6dbd2
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/kwayrefine.c
@@ -0,0 +1,672 @@
+/*!
+\file
+\brief Driving routines for multilevel k-way refinement
+
+\date   Started 7/28/1997
+\author George 
+\author  Copyright 1997-2009, Regents of the University of Minnesota 
+\version $Id: kwayrefine.c 10737 2011-09-13 13:37:25Z karypis $ 
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point of cut-based refinement */
+/*************************************************************************/
+void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph)
+{
+  idx_t i, nlevels, contig=ctrl->contig;
+  graph_t *ptr;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr));
+
+  /* Determine how many levels are there */
+  for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); 
+
+  /* Compute the parameters of the coarsest graph */
+  ComputeKWayPartitionParams(ctrl, graph);
+
+  /* Try to minimize the sub-domain connectivity */
+  if (ctrl->minconn) 
+    EliminateSubDomainEdges(ctrl, graph);
+  
+  /* Deal with contiguity constraints at the beginning */
+  if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) { 
+    EliminateComponents(ctrl, graph);
+
+    ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE);
+    Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); 
+
+    ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE);
+    Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); 
+
+    ctrl->contig = 0;
+  }
+
+  /* Refine each successively finer graph */
+  for (i=0; ;i++) {
+    if (ctrl->minconn && i == nlevels/2) 
+      EliminateSubDomainEdges(ctrl, graph);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr));
+
+    if (2*i >= nlevels && !IsBalanced(ctrl, graph, .02)) {
+      ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE);
+      Greedy_KWayOptimize(ctrl, graph, 1, 0, OMODE_BALANCE); 
+      ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE);
+    }
+
+    Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 5.0, OMODE_REFINE); 
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr));
+
+    /* Deal with contiguity constraints in the middle */
+    if (contig && i == nlevels/2) {
+      if (FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) {
+        EliminateComponents(ctrl, graph);
+
+        if (!IsBalanced(ctrl, graph, .02)) {
+          ctrl->contig = 1;
+          ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE);
+          Greedy_KWayOptimize(ctrl, graph, 5, 0, OMODE_BALANCE); 
+  
+          ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE);
+          Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); 
+          ctrl->contig = 0;
+        }
+      }
+    }
+
+    if (graph == orggraph)
+      break;
+
+    graph = graph->finer;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr));
+    ASSERT(graph->vwgt != NULL);
+
+    ProjectKWayPartition(ctrl, graph);
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr));
+  }
+
+  /* Deal with contiguity requirement at the end */
+  ctrl->contig = contig;
+  if (contig && FindPartitionInducedComponents(graph, graph->where, NULL, NULL) > ctrl->nparts) 
+    EliminateComponents(ctrl, graph);
+
+  if (!IsBalanced(ctrl, graph, 0.0)) {
+    ComputeKWayBoundary(ctrl, graph, BNDTYPE_BALANCE);
+    Greedy_KWayOptimize(ctrl, graph, 10, 0, OMODE_BALANCE); 
+
+    ComputeKWayBoundary(ctrl, graph, BNDTYPE_REFINE);
+    Greedy_KWayOptimize(ctrl, graph, ctrl->niter, 0, OMODE_REFINE); 
+  }
+
+  if (ctrl->contig) 
+    ASSERT(FindPartitionInducedComponents(graph, graph->where, NULL, NULL) == ctrl->nparts);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr));
+}
+
+
+/*************************************************************************/
+/*! This function allocates memory for the k-way cut-based refinement */
+/*************************************************************************/
+void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph)
+{
+
+  graph->pwgts  = imalloc(ctrl->nparts*graph->ncon, "AllocateKWayPartitionMemory: pwgts");
+  graph->where  = imalloc(graph->nvtxs,  "AllocateKWayPartitionMemory: where");
+  graph->bndptr = imalloc(graph->nvtxs,  "AllocateKWayPartitionMemory: bndptr");
+  graph->bndind = imalloc(graph->nvtxs,  "AllocateKWayPartitionMemory: bndind");
+
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      graph->ckrinfo  = (ckrinfo_t *)gk_malloc(graph->nvtxs*sizeof(ckrinfo_t), 
+                          "AllocateKWayPartitionMemory: ckrinfo");
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      graph->vkrinfo = (vkrinfo_t *)gk_malloc(graph->nvtxs*sizeof(vkrinfo_t), 
+                          "AllocateKWayVolPartitionMemory: vkrinfo");
+
+      /* This is to let the cut-based -minconn and -contig large-scale graph
+         changes to go through */
+      graph->ckrinfo = (ckrinfo_t *)graph->vkrinfo;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the initial id/ed  for cut-based partitioning */
+/**************************************************************************/
+void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, k, l, nvtxs, ncon, nparts, nbnd, mincut, me, other;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where, *bndind, *bndptr;
+
+  nparts = ctrl->nparts;
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  where  = graph->where;
+  pwgts  = iset(nparts*ncon, 0, graph->pwgts);
+  bndind = graph->bndind;
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+
+  nbnd = mincut = 0;
+
+  /* Compute pwgts */
+  if (ncon == 1) {
+    for (i=0; i<nvtxs; i++) {
+      ASSERT(where[i] >= 0 && where[i] < nparts);
+      pwgts[where[i]] += vwgt[i];
+    }
+  }
+  else {
+    for (i=0; i<nvtxs; i++) {
+      me = where[i];
+      for (j=0; j<ncon; j++)
+        pwgts[me*ncon+j] += vwgt[i*ncon+j];
+    }
+  }
+
+  /* Compute the required info for refinement */
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      {
+        ckrinfo_t *myrinfo;
+        cnbr_t *mynbrs;
+
+        memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs);
+        cnbrpoolReset(ctrl);
+
+        for (i=0; i<nvtxs; i++) {
+          me      = where[i];
+          myrinfo = graph->ckrinfo+i;
+
+          for (j=xadj[i]; j<xadj[i+1]; j++) {
+            if (me == where[adjncy[j]])
+              myrinfo->id += adjwgt[j];
+            else
+              myrinfo->ed += adjwgt[j];
+          }
+
+          /* Time to compute the particular external degrees */
+          if (myrinfo->ed > 0) {
+            mincut += myrinfo->ed;
+
+            myrinfo->inbr = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+            mynbrs        = ctrl->cnbrpool + myrinfo->inbr;
+
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              other = where[adjncy[j]];
+              if (me != other) {
+                for (k=0; k<myrinfo->nnbrs; k++) {
+                  if (mynbrs[k].pid == other) {
+                    mynbrs[k].ed += adjwgt[j];
+                    break;
+                  }
+                }
+                if (k == myrinfo->nnbrs) {
+                  mynbrs[k].pid = other;
+                  mynbrs[k].ed  = adjwgt[j];
+                  myrinfo->nnbrs++;
+                }
+              }
+            }
+
+            ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]);
+
+            /* Only ed-id>=0 nodes are considered to be in the boundary */
+            if (myrinfo->ed-myrinfo->id >= 0)
+              BNDInsert(nbnd, bndind, bndptr, i);
+          }
+          else {
+            myrinfo->inbr = -1;
+          }
+        }
+
+        graph->mincut = mincut/2;
+        graph->nbnd   = nbnd;
+
+      }
+      ASSERT(CheckBnd2(graph));
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      {
+        vkrinfo_t *myrinfo;
+        vnbr_t *mynbrs;
+
+        memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs);
+        vnbrpoolReset(ctrl);
+
+        /* Compute now the id/ed degrees */
+        for (i=0; i<nvtxs; i++) {
+          me      = where[i];
+          myrinfo = graph->vkrinfo+i;
+      
+          for (j=xadj[i]; j<xadj[i+1]; j++) {
+            if (me == where[adjncy[j]]) 
+              myrinfo->nid++;
+            else 
+              myrinfo->ned++;
+          }
+      
+          /* Time to compute the particular external degrees */
+          if (myrinfo->ned > 0) { 
+            mincut += myrinfo->ned;
+
+            myrinfo->inbr = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+            mynbrs        = ctrl->vnbrpool + myrinfo->inbr;
+
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              other = where[adjncy[j]];
+              if (me != other) {
+                for (k=0; k<myrinfo->nnbrs; k++) {
+                  if (mynbrs[k].pid == other) {
+                    mynbrs[k].ned++;
+                    break;
+                  }
+                }
+                if (k == myrinfo->nnbrs) {
+                  mynbrs[k].gv  = 0;
+                  mynbrs[k].pid = other;
+                  mynbrs[k].ned = 1;
+                  myrinfo->nnbrs++;
+                }
+              }
+            }
+            ASSERT(myrinfo->nnbrs <= xadj[i+1]-xadj[i]);
+          }
+          else {
+            myrinfo->inbr = -1;
+          }
+        }
+        graph->mincut = mincut/2;
+      
+        ComputeKWayVolGains(ctrl, graph);
+      }
+      ASSERT(graph->minvol == ComputeVolume(graph, graph->where));
+      break;
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! This function projects a partition, and at the same time computes the
+ parameters for refinement. */
+/*************************************************************************/
+void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, k, nvtxs, nbnd, nparts, me, other, istart, iend, tid, ted;
+  idx_t *xadj, *adjncy, *adjwgt;
+  idx_t *cmap, *where, *bndptr, *bndind, *cwhere, *htable;
+  graph_t *cgraph;
+
+  WCOREPUSH;
+
+  nparts = ctrl->nparts;
+
+  cgraph = graph->coarser;
+  cwhere = cgraph->where;
+
+  nvtxs   = graph->nvtxs;
+  cmap    = graph->cmap;
+  xadj    = graph->xadj;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+
+  AllocateKWayPartitionMemory(ctrl, graph);
+
+  where  = graph->where;
+  bndind = graph->bndind;
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+
+  htable = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+
+  /* Compute the required info for refinement */
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      ASSERT(CheckBnd2(cgraph));
+      {
+        ckrinfo_t *myrinfo;
+        cnbr_t *mynbrs;
+
+        /* go through and project partition and compute id/ed for the nodes */
+        for (i=0; i<nvtxs; i++) {
+          k        = cmap[i];
+          where[i] = cwhere[k];
+          cmap[i]  = cgraph->ckrinfo[k].ed;  /* For optimization */
+        }
+
+        memset(graph->ckrinfo, 0, sizeof(ckrinfo_t)*nvtxs);
+        cnbrpoolReset(ctrl);
+
+        for (nbnd=0, i=0; i<nvtxs; i++) {
+          istart = xadj[i];
+          iend   = xadj[i+1];
+
+          myrinfo = graph->ckrinfo+i;
+
+          if (cmap[i] == 0) { /* Interior node. Note that cmap[i] = crinfo[cmap[i]].ed */
+            for (tid=0, j=istart; j<iend; j++) 
+              tid += adjwgt[j];
+
+            myrinfo->id   = tid;
+            myrinfo->inbr = -1;
+          }
+          else { /* Potentially an interface node */
+            myrinfo->inbr = cnbrpoolGetNext(ctrl, iend-istart+1);
+            mynbrs        = ctrl->cnbrpool + myrinfo->inbr;
+
+            me = where[i];
+            for (tid=0, ted=0, j=istart; j<iend; j++) {
+              other = where[adjncy[j]];
+              if (me == other) {
+                tid += adjwgt[j];
+              }
+              else {
+                ted += adjwgt[j];
+                if ((k = htable[other]) == -1) {
+                  htable[other]               = myrinfo->nnbrs;
+                  mynbrs[myrinfo->nnbrs].pid  = other;
+                  mynbrs[myrinfo->nnbrs++].ed = adjwgt[j];
+                }
+                else {
+                  mynbrs[k].ed += adjwgt[j];
+                }
+              }
+            }
+            myrinfo->id = tid;
+            myrinfo->ed = ted;
+      
+            /* Remove space for edegrees if it was interior */
+            if (ted == 0) { 
+              ctrl->nbrpoolcpos -= iend-istart+1;
+              myrinfo->inbr      = -1;
+            }
+            else {
+              if (ted-tid >= 0) 
+                BNDInsert(nbnd, bndind, bndptr, i); 
+      
+              for (j=0; j<myrinfo->nnbrs; j++)
+                htable[mynbrs[j].pid] = -1;
+            }
+          }
+        }
+      
+        graph->nbnd = nbnd;
+
+      }
+      ASSERT(CheckBnd2(graph));
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      {
+        vkrinfo_t *myrinfo;
+        vnbr_t *mynbrs;
+
+        ASSERT(cgraph->minvol == ComputeVolume(cgraph, cgraph->where));
+
+        /* go through and project partition and compute id/ed for the nodes */
+        for (i=0; i<nvtxs; i++) {
+          k        = cmap[i];
+          where[i] = cwhere[k];
+          cmap[i]  = cgraph->vkrinfo[k].ned;  /* For optimization */
+        }
+
+        memset(graph->vkrinfo, 0, sizeof(vkrinfo_t)*nvtxs);
+        vnbrpoolReset(ctrl);
+
+        for (i=0; i<nvtxs; i++) {
+          istart = xadj[i];
+          iend   = xadj[i+1];
+          myrinfo = graph->vkrinfo+i;
+
+          if (cmap[i] == 0) { /* Note that cmap[i] = crinfo[cmap[i]].ed */
+            myrinfo->nid  = iend-istart;
+            myrinfo->inbr = -1;
+          }
+          else { /* Potentially an interface node */
+            myrinfo->inbr = vnbrpoolGetNext(ctrl, iend-istart+1);
+            mynbrs        = ctrl->vnbrpool + myrinfo->inbr;
+
+            me = where[i];
+            for (tid=0, ted=0, j=istart; j<iend; j++) {
+              other = where[adjncy[j]];
+              if (me == other) {
+                tid++;
+              }
+              else {
+                ted++;
+                if ((k = htable[other]) == -1) {
+                  htable[other]                = myrinfo->nnbrs;
+                  mynbrs[myrinfo->nnbrs].gv    = 0;
+                  mynbrs[myrinfo->nnbrs].pid   = other;
+                  mynbrs[myrinfo->nnbrs++].ned = 1;
+                }
+                else {
+                  mynbrs[k].ned++;
+                }
+              }
+            }
+            myrinfo->nid = tid;
+            myrinfo->ned = ted;
+      
+            /* Remove space for edegrees if it was interior */
+            if (ted == 0) { 
+              ctrl->nbrpoolcpos -= iend-istart+1;
+              myrinfo->inbr = -1;
+            }
+            else {
+              for (j=0; j<myrinfo->nnbrs; j++)
+                htable[mynbrs[j].pid] = -1;
+            }
+          }
+        }
+      
+        ComputeKWayVolGains(ctrl, graph);
+
+        ASSERT(graph->minvol == ComputeVolume(graph, graph->where));
+      }
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+
+  graph->mincut = cgraph->mincut;
+  icopy(nparts*graph->ncon, cgraph->pwgts, graph->pwgts);
+
+  FreeGraph(&graph->coarser);
+  graph->coarser = NULL;
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function computes the boundary definition for balancing. */
+/*************************************************************************/
+void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype)
+{
+  idx_t i, nvtxs, nbnd;
+  idx_t *bndind, *bndptr;
+
+  nvtxs  = graph->nvtxs;
+  bndind = graph->bndind;
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+
+  nbnd = 0;
+
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      /* Compute the boundary */
+      if (bndtype == BNDTYPE_REFINE) {
+        for (i=0; i<nvtxs; i++) {
+          if (graph->ckrinfo[i].ed-graph->ckrinfo[i].id >= 0) 
+            BNDInsert(nbnd, bndind, bndptr, i);
+        }
+      }
+      else { /* BNDTYPE_BALANCE */
+        for (i=0; i<nvtxs; i++) {
+          if (graph->ckrinfo[i].ed > 0) 
+            BNDInsert(nbnd, bndind, bndptr, i);
+        }
+      }
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      /* Compute the boundary */
+      if (bndtype == BNDTYPE_REFINE) {
+        for (i=0; i<nvtxs; i++) {
+          if (graph->vkrinfo[i].gv >= 0)
+            BNDInsert(nbnd, bndind, bndptr, i);
+        }
+      }
+      else { /* BNDTYPE_BALANCE */
+        for (i=0; i<nvtxs; i++) {
+          if (graph->vkrinfo[i].ned > 0) 
+            BNDInsert(nbnd, bndind, bndptr, i);
+        }
+      }
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+
+  graph->nbnd = nbnd;
+}
+
+
+/*************************************************************************/
+/*! This function computes the initial gains in the communication volume */
+/*************************************************************************/
+void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, k, l, nvtxs, nparts, me, other, pid; 
+  idx_t *xadj, *vsize, *adjncy, *adjwgt, *where, 
+        *bndind, *bndptr, *ophtable;
+  vkrinfo_t *myrinfo, *orinfo;
+  vnbr_t *mynbrs, *onbrs;
+
+  WCOREPUSH;
+
+  nparts = ctrl->nparts;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vsize  = graph->vsize;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  where  = graph->where;
+  bndind = graph->bndind;
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+
+  ophtable = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+
+  /* Compute the volume gains */
+  graph->minvol = graph->nbnd = 0;
+  for (i=0; i<nvtxs; i++) {
+    myrinfo     = graph->vkrinfo+i;
+    myrinfo->gv = IDX_MIN;
+
+    if (myrinfo->nnbrs > 0) {
+      me     = where[i];
+      mynbrs = ctrl->vnbrpool + myrinfo->inbr;
+
+      graph->minvol += myrinfo->nnbrs*vsize[i];
+
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii     = adjncy[j];
+        other  = where[ii];
+        orinfo = graph->vkrinfo+ii;
+        onbrs  = ctrl->vnbrpool + orinfo->inbr;
+
+        for (k=0; k<orinfo->nnbrs; k++) 
+          ophtable[onbrs[k].pid] = k;
+        ophtable[other] = 1;  /* this is to simplify coding */
+
+        if (me == other) {
+          /* Find which domains 'i' is connected to but 'ii' is not 
+             and update their gain */
+          for (k=0; k<myrinfo->nnbrs; k++) {
+            if (ophtable[mynbrs[k].pid] == -1)
+              mynbrs[k].gv -= vsize[ii];
+          }
+        }
+        else {
+          ASSERT(ophtable[me] != -1);
+
+          if (onbrs[ophtable[me]].ned == 1) { 
+            /* I'm the only connection of 'ii' in 'me' */
+            /* Increase the gains for all the common domains between 'i' and 'ii' */
+            for (k=0; k<myrinfo->nnbrs; k++) {
+              if (ophtable[mynbrs[k].pid] != -1) 
+                mynbrs[k].gv += vsize[ii];
+            }
+          }
+          else {
+            /* Find which domains 'i' is connected to and 'ii' is not 
+               and update their gain */
+            for (k=0; k<myrinfo->nnbrs; k++) {
+              if (ophtable[mynbrs[k].pid] == -1) 
+                mynbrs[k].gv -= vsize[ii];
+            }
+          }
+        }
+
+        /* Reset the marker vector */
+        for (k=0; k<orinfo->nnbrs; k++) 
+          ophtable[onbrs[k].pid] = -1;
+        ophtable[other] = -1;
+      }
+
+      /* Compute the max vgain */
+      for (k=0; k<myrinfo->nnbrs; k++) {
+        if (mynbrs[k].gv > myrinfo->gv)
+          myrinfo->gv = mynbrs[k].gv;
+      }
+
+      /* Add the extra gain due to id == 0 */
+      if (myrinfo->ned > 0 && myrinfo->nid == 0)
+        myrinfo->gv += vsize[i];
+    }
+
+    if (myrinfo->gv >= 0)
+      BNDInsert(graph->nbnd, bndind, bndptr, i);
+  }
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function checks if the partition weights are within the balance
+contraints */
+/*************************************************************************/
+int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor)
+{
+  return 
+    (ComputeLoadImbalanceDiff(graph, ctrl->nparts, ctrl->pijbm, ctrl->ubfactors) 
+         <= ffactor);
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/macros.h b/3rdParty/metis/metis-5.1.0/libmetis/macros.h
new file mode 100644
index 000000000..3f6f7d9ed
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/macros.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * macros.h
+ *
+ * This file contains macros used in multilevel
+ *
+ * Started 9/25/94
+ * George
+ *
+ * $Id: macros.h 10060 2011-06-02 18:56:30Z karypis $
+ *
+ */
+
+#ifndef _LIBMETIS_MACROS_H_
+#define _LIBMETIS_MACROS_H_
+
+/*************************************************************************
+* The following macro returns a random number in the specified range
+**************************************************************************/
+#define AND(a, b) ((a) < 0 ? ((-(a))&(b)) : ((a)&(b)))
+#define OR(a, b) ((a) < 0 ? -((-(a))|(b)) : ((a)|(b)))
+#define XOR(a, b) ((a) < 0 ? -((-(a))^(b)) : ((a)^(b)))
+
+//#define icopy(n, a, b) (idx_t *)memcpy((void *)(b), (void *)(a), sizeof(idx_t)*(n)) 
+
+#define HASHFCT(key, size) ((key)%(size))
+#define SWAP gk_SWAP
+
+/* gets the appropriate option value */
+#define GETOPTION(options, idx, defval) \
+            ((options) == NULL || (options)[idx] == -1 ? defval : (options)[idx]) 
+
+/* converts a user provided ufactor into a real ubfactor */
+#define I2RUBFACTOR(ufactor) (1.0+0.001*(ufactor))
+
+/* set/reset the current workspace core */
+#define WCOREPUSH    wspacepush(ctrl)
+#define WCOREPOP     wspacepop(ctrl)
+
+
+
+/*************************************************************************
+* These macros insert and remove nodes from a Direct Access list 
+**************************************************************************/
+#define ListInsert(n, lind, lptr, i) \
+   do { \
+     ASSERT(lptr[i] == -1); \
+     lind[n] = i; \
+     lptr[i] = (n)++;\
+   } while(0) 
+
+#define ListDelete(n, lind, lptr, i) \
+   do { \
+     ASSERT(lptr[i] != -1); \
+     lind[lptr[i]] = lind[--(n)]; \
+     lptr[lind[n]] = lptr[i]; \
+     lptr[i] = -1; \
+   } while(0) 
+
+
+/*************************************************************************
+* These macros insert and remove nodes from the boundary list
+**************************************************************************/
+#define BNDInsert(nbnd, bndind, bndptr, vtx) \
+  ListInsert(nbnd, bndind, bndptr, vtx)
+
+#define BNDDelete(nbnd, bndind, bndptr, vtx) \
+  ListDelete(nbnd, bndind, bndptr, vtx)
+
+
+/*************************************************************************
+* These macros deal with id/ed updating during k-way refinement
+**************************************************************************/
+#define UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, \
+            nbnd, bndptr, bndind, bndtype) \
+   do { \
+     where[i] = to; \
+     myrinfo->ed += myrinfo->id-mynbrs[k].ed; \
+     SWAP(myrinfo->id, mynbrs[k].ed, j); \
+     if (mynbrs[k].ed == 0) \
+       mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \
+     else \
+       mynbrs[k].pid = from; \
+     \
+     /* Update the boundary information. Both deletion and addition is \
+        allowed as this routine can be used for moving arbitrary nodes. */ \
+     if (bndtype == BNDTYPE_REFINE) { \
+       if (bndptr[i] != -1 && myrinfo->ed - myrinfo->id < 0) \
+         BNDDelete(nbnd, bndind, bndptr, i); \
+       if (bndptr[i] == -1 && myrinfo->ed - myrinfo->id >= 0) \
+         BNDInsert(nbnd, bndind, bndptr, i); \
+     } \
+     else { \
+       if (bndptr[i] != -1 && myrinfo->ed <= 0) \
+         BNDDelete(nbnd, bndind, bndptr, i); \
+       if (bndptr[i] == -1 && myrinfo->ed > 0) \
+         BNDInsert(nbnd, bndind, bndptr, i); \
+     } \
+   } while(0) 
+
+
+#define UpdateAdjacentVertexInfoAndBND(ctrl, vid, adjlen, me, from, to, \
+            myrinfo, ewgt, nbnd, bndptr, bndind, bndtype) \
+   do { \
+     idx_t k; \
+     cnbr_t *mynbrs; \
+     \
+     if (myrinfo->inbr == -1) { \
+       myrinfo->inbr  = cnbrpoolGetNext(ctrl, adjlen+1); \
+       myrinfo->nnbrs = 0; \
+     } \
+     ASSERT(CheckRInfo(ctrl, myrinfo)); \
+     \
+     mynbrs = ctrl->cnbrpool + myrinfo->inbr; \
+     \
+     /* Update global ID/ED and boundary */ \
+     if (me == from) { \
+       INC_DEC(myrinfo->ed, myrinfo->id, (ewgt)); \
+       if (bndtype == BNDTYPE_REFINE) { \
+         if (myrinfo->ed-myrinfo->id >= 0 && bndptr[(vid)] == -1) \
+           BNDInsert(nbnd, bndind, bndptr, (vid)); \
+       } \
+       else { \
+         if (myrinfo->ed > 0 && bndptr[(vid)] == -1) \
+           BNDInsert(nbnd, bndind, bndptr, (vid)); \
+       } \
+     } \
+     else if (me == to) { \
+       INC_DEC(myrinfo->id, myrinfo->ed, (ewgt)); \
+       if (bndtype == BNDTYPE_REFINE) { \
+         if (myrinfo->ed-myrinfo->id < 0 && bndptr[(vid)] != -1) \
+           BNDDelete(nbnd, bndind, bndptr, (vid)); \
+       } \
+       else { \
+         if (myrinfo->ed <= 0 && bndptr[(vid)] != -1) \
+           BNDDelete(nbnd, bndind, bndptr, (vid)); \
+       } \
+     } \
+     \
+     /* Remove contribution from the .ed of 'from' */ \
+     if (me != from) { \
+       for (k=0; k<myrinfo->nnbrs; k++) { \
+         if (mynbrs[k].pid == from) { \
+           if (mynbrs[k].ed == (ewgt)) \
+             mynbrs[k] = mynbrs[--myrinfo->nnbrs]; \
+           else \
+             mynbrs[k].ed -= (ewgt); \
+           break; \
+         } \
+       } \
+     } \
+     \
+     /* Add contribution to the .ed of 'to' */ \
+     if (me != to) { \
+       for (k=0; k<myrinfo->nnbrs; k++) { \
+         if (mynbrs[k].pid == to) { \
+           mynbrs[k].ed += (ewgt); \
+           break; \
+         } \
+       } \
+       if (k == myrinfo->nnbrs) { \
+         mynbrs[k].pid  = to; \
+         mynbrs[k].ed   = (ewgt); \
+         myrinfo->nnbrs++; \
+       } \
+     } \
+     \
+     ASSERT(CheckRInfo(ctrl, myrinfo));\
+   } while(0) 
+
+
+#define UpdateQueueInfo(queue, vstatus, vid, me, from, to, myrinfo, oldnnbrs, \
+            nupd, updptr, updind, bndtype) \
+   do { \
+     real_t rgain; \
+     \
+     if (me == to || me == from || oldnnbrs != myrinfo->nnbrs) {  \
+       rgain = (myrinfo->nnbrs > 0 ?  \
+                1.0*myrinfo->ed/sqrt(myrinfo->nnbrs) : 0.0) - myrinfo->id; \
+   \
+       if (bndtype == BNDTYPE_REFINE) { \
+         if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \
+           if (myrinfo->ed-myrinfo->id >= 0) \
+             rpqUpdate(queue, (vid), rgain); \
+           else { \
+             rpqDelete(queue, (vid)); \
+             vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \
+             ListDelete(nupd, updind, updptr, (vid)); \
+           } \
+         } \
+         else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed-myrinfo->id >= 0) { \
+           rpqInsert(queue, (vid), rgain); \
+           vstatus[(vid)] = VPQSTATUS_PRESENT; \
+           ListInsert(nupd, updind, updptr, (vid)); \
+         } \
+       } \
+       else { \
+         if (vstatus[(vid)] == VPQSTATUS_PRESENT) { \
+           if (myrinfo->ed > 0) \
+             rpqUpdate(queue, (vid), rgain); \
+           else { \
+             rpqDelete(queue, (vid)); \
+             vstatus[(vid)] = VPQSTATUS_NOTPRESENT; \
+             ListDelete(nupd, updind, updptr, (vid)); \
+           } \
+         } \
+         else if (vstatus[(vid)] == VPQSTATUS_NOTPRESENT && myrinfo->ed > 0) { \
+           rpqInsert(queue, (vid), rgain); \
+           vstatus[(vid)] = VPQSTATUS_PRESENT; \
+           ListInsert(nupd, updind, updptr, (vid)); \
+         } \
+       } \
+     } \
+   } while(0) 
+
+
+
+/*************************************************************************/
+/*! This macro determines the set of subdomains that a vertex can move to
+    without increasins the maxndoms. */
+/*************************************************************************/
+#define SelectSafeTargetSubdomains(myrinfo, mynbrs, nads, adids, maxndoms, safetos, vtmp) \
+  do { \
+    idx_t j, k, l, nadd, to; \
+    for (j=0; j<myrinfo->nnbrs; j++) { \
+      safetos[to = mynbrs[j].pid] = 0; \
+      \
+      /* uncompress the connectivity info for the 'to' subdomain */ \
+      for (k=0; k<nads[to]; k++) \
+        vtmp[adids[to][k]] = 1; \
+      \
+      for (nadd=0, k=0; k<myrinfo->nnbrs; k++) { \
+        if (k == j) \
+          continue; \
+        \
+        l = mynbrs[k].pid; \
+        if (vtmp[l] == 0) { \
+          if (nads[l] > maxndoms-1) { \
+            nadd = maxndoms; \
+            break; \
+          } \
+          nadd++; \
+        } \
+      } \
+      if (nads[to]+nadd <= maxndoms) \
+        safetos[to] = 1; \
+      if (nadd == 0) \
+        safetos[to] = 2; \
+      \
+      /* cleanup the connectivity info due to the 'to' subdomain */ \
+      for (k=0; k<nads[to]; k++) \
+        vtmp[adids[to][k]] = 0; \
+    } \
+  } while (0)
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/mcutil.c b/3rdParty/metis/metis-5.1.0/libmetis/mcutil.c
new file mode 100644
index 000000000..6e20f556a
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/mcutil.c
@@ -0,0 +1,330 @@
+/*
+ * mutil.c 
+ *
+ * This file contains various utility functions for the MOC portion of the
+ * code
+ *
+ * Started 2/15/98
+ * George
+ *
+ * $Id: mcutil.c 13901 2013-03-24 16:17:03Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function compares two vectors x & y and returns true 
+    if \forall i, x[i] <= y[i].
+*/
+/**************************************************************************/
+int rvecle(idx_t n, real_t *x, real_t *y)
+{
+  for (n--; n>=0; n--) {
+    if (x[n] > y[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function compares two vectors x & y and returns true 
+    if \forall i, x[i] >= y[i].
+*/
+/**************************************************************************/
+int rvecge(idx_t n, real_t *x, real_t *y)
+{
+  for (n--; n>=0; n--) {
+    if (x[n] < y[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function compares vectors x1+x2 against y and returns true 
+    if \forall i, x1[i]+x2[i] <= y[i]. 
+*/
+/**************************************************************************/
+int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y)
+{
+  for (n--; n>=0; n--) {
+    if (x1[n]+x2[n] > y[n]) 
+      return 0;
+  }
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function returns max_i(x[i]-y[i]) */
+/**************************************************************************/
+real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y)
+{
+  real_t max;
+
+  max = x[0]-y[0];
+
+  for (n--; n>0; n--) {
+    if (max < x[n]-y[n]) 
+      max = x[n]-y[n];
+  }
+
+  return max;
+}
+
+
+/*************************************************************************/
+/*! This function returns true if \forall i, x[i] <= z[i]. */
+/**************************************************************************/
+int ivecle(idx_t n, idx_t *x, idx_t *z)
+{
+  for (n--; n>=0; n--) {
+    if (x[n] > z[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function returns true if \forall i, x[i] >= z[i]. */
+/**************************************************************************/
+int ivecge(idx_t n, idx_t *x, idx_t *z)
+{
+  for (n--; n>=0; n--) {
+    if (x[n] < z[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function returns true if \forall i, a*x[i]+y[i] <= z[i]. */
+/**************************************************************************/
+int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z)
+{
+  for (n--; n>=0; n--) {
+    if (a*x[n]+y[n] > z[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function returns true if \forall i, a*x[i]+y[i] >= z[i]. */
+/**************************************************************************/
+int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z)
+{
+  for (n--; n>=0; n--) {
+    if (a*x[n]+y[n] < z[n]) 
+      return 0;
+  }
+
+  return  1;
+}
+
+
+/*************************************************************************/
+/*! This function checks if v+u2 provides a better balance in the weight 
+     vector that v+u1 */
+/*************************************************************************/
+int BetterVBalance(idx_t ncon, real_t *invtvwgt, idx_t *v_vwgt, idx_t *u1_vwgt, 
+        idx_t *u2_vwgt)
+{
+  idx_t i;
+  real_t sum1=0.0, sum2=0.0, diff1=0.0, diff2=0.0;
+
+  for (i=0; i<ncon; i++) {
+    sum1 += (v_vwgt[i]+u1_vwgt[i])*invtvwgt[i];
+    sum2 += (v_vwgt[i]+u2_vwgt[i])*invtvwgt[i];
+  }
+  sum1 = sum1/ncon;
+  sum2 = sum2/ncon;
+
+  for (i=0; i<ncon; i++) {
+    diff1 += rabs(sum1 - (v_vwgt[i]+u1_vwgt[i])*invtvwgt[i]);
+    diff2 += rabs(sum2 - (v_vwgt[i]+u2_vwgt[i])*invtvwgt[i]);
+  }
+
+  return (diff1 - diff2 >= 0);
+}
+
+
+/*************************************************************************/
+/*! This function takes two ubfactor-centered load imbalance vectors x & y, 
+    and returns true if y is better balanced than x. */
+/*************************************************************************/ 
+int BetterBalance2Way(idx_t n, real_t *x, real_t *y)
+{
+  real_t nrm1=0.0, nrm2=0.0;
+
+  for (--n; n>=0; n--) {
+    if (x[n] > 0) nrm1 += x[n]*x[n];
+    if (y[n] > 0) nrm2 += y[n]*y[n];
+  }
+  return nrm2 < nrm1;
+}
+
+
+/*************************************************************************/
+/*! Given a vertex and two weights, this function returns 1, if the second 
+    partition will be more balanced than the first after the weighted 
+    additional of that vertex.
+    The balance determination takes into account the ideal target weights
+    of the two partitions.
+*/
+/*************************************************************************/
+int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *ubvec, 
+        idx_t a1, idx_t *pt1, real_t *bm1, 
+        idx_t a2, idx_t *pt2, real_t *bm2)
+{
+  idx_t i;
+  real_t tmp, nrm1=0.0, nrm2=0.0, max1=0.0, max2=0.0;
+
+  for (i=0; i<ncon; i++) {
+    tmp = bm1[i]*(pt1[i]+a1*vwgt[i]) - ubvec[i];
+    //printf("BB: %d %+.4f ", (int)i, (float)tmp);
+    nrm1 += tmp*tmp;
+    max1 = (tmp > max1 ? tmp : max1);
+
+    tmp = bm2[i]*(pt2[i]+a2*vwgt[i]) - ubvec[i];
+    //printf("%+.4f ", (float)tmp);
+    nrm2 += tmp*tmp;
+    max2 = (tmp > max2 ? tmp : max2);
+
+    //printf("%4d %4d %4d %4d %4d %4d %4d %.2f\n", 
+    //    (int)vwgt[i],
+    //    (int)a1, (int)pt1[i], (int)tpt1[i],
+    //    (int)a2, (int)pt2[i], (int)tpt2[i], ubvec[i]);
+  }
+  //printf("   %.3f %.3f %.3f %.3f\n", (float)max1, (float)nrm1, (float)max2, (float)nrm2);
+
+  if (max2 < max1)
+    return 1;
+
+  if (max2 == max1 && nrm2 < nrm1)
+    return 1;
+
+  return 0;
+}
+
+
+/*************************************************************************/
+/*! Computes the maximum load imbalance of a partitioning solution over 
+    all the constraints. */
+/**************************************************************************/ 
+real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm)
+{
+  idx_t i, j, ncon, *pwgts;
+  real_t max, cur;
+
+  ncon  = graph->ncon;
+  pwgts = graph->pwgts;
+
+  max = 1.0;
+  for (i=0; i<ncon; i++) {
+    for (j=0; j<nparts; j++) {
+      cur = pwgts[j*ncon+i]*pijbm[j*ncon+i];
+      if (cur > max)
+        max = cur;
+    }
+  }
+
+  return max;
+}
+
+
+/*************************************************************************/
+/*! Computes the maximum load imbalance difference of a partitioning 
+    solution over all the constraints. 
+    The difference is defined with respect to the allowed maximum 
+    unbalance for the respective constraint. 
+ */
+/**************************************************************************/ 
+real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm,
+           real_t *ubvec)
+{
+  idx_t i, j, ncon, *pwgts;
+  real_t max, cur;
+
+  ncon  = graph->ncon;
+  pwgts = graph->pwgts;
+
+  max = -1.0;
+  for (i=0; i<ncon; i++) {
+    for (j=0; j<nparts; j++) {
+      cur = pwgts[j*ncon+i]*pijbm[j*ncon+i] - ubvec[i];
+      if (cur > max)
+        max = cur;
+    }
+  }
+
+  return max;
+}
+
+
+/*************************************************************************/
+/*! Computes the difference between load imbalance of each constraint across 
+    the partitions minus the desired upper bound on the load imabalnce.
+    It also returns the maximum load imbalance across the partitions &
+    constraints. */
+/**************************************************************************/ 
+real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, 
+         real_t *ubfactors, real_t *diffvec)
+{
+  idx_t i, j, ncon, *pwgts;
+  real_t cur, max;
+
+  ncon  = graph->ncon;
+  pwgts = graph->pwgts;
+
+  for (max=-1.0, i=0; i<ncon; i++) {
+    diffvec[i] = pwgts[i]*pijbm[i] - ubfactors[i];
+    for (j=1; j<nparts; j++) {
+      cur = pwgts[j*ncon+i]*pijbm[j*ncon+i] - ubfactors[i];
+      if (cur > diffvec[i])
+        diffvec[i] = cur;
+    }
+    if (max < diffvec[i])
+      max = diffvec[i];
+  }
+
+  return max;
+}
+
+
+/*************************************************************************/
+/*! Computes the load imbalance of each constraint across the partitions. */
+/**************************************************************************/ 
+void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm, 
+         real_t *lbvec)
+{
+  idx_t i, j, ncon, *pwgts;
+  real_t cur;
+
+  ncon  = graph->ncon;
+  pwgts = graph->pwgts;
+
+  for (i=0; i<ncon; i++) {
+    lbvec[i] = pwgts[i]*pijbm[i];
+    for (j=1; j<nparts; j++) {
+      cur = pwgts[j*ncon+i]*pijbm[j*ncon+i];
+      if (cur > lbvec[i])
+        lbvec[i] = cur;
+    }
+  }
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/mesh.c b/3rdParty/metis/metis-5.1.0/libmetis/mesh.c
new file mode 100644
index 000000000..3c5261211
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/mesh.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * mesh.c
+ *
+ * This file contains routines for converting 3D and 4D finite element
+ * meshes into dual or nodal graphs
+ *
+ * Started 8/18/97
+ * George
+ *
+ * $Id: mesh.c 13804 2013-03-04 23:49:08Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*****************************************************************************/
+/*! This function creates a graph corresponding to the dual of a finite element
+    mesh. 
+
+    \param ne is the number of elements in the mesh.
+    \param nn is the number of nodes in the mesh.
+    \param eptr is an array of size ne+1 used to mark the start and end 
+           locations in the nind array.
+    \param eind is an array that stores for each element the set of node IDs 
+           (indices) that it is made off. The length of this array is equal
+           to the total number of nodes over all the mesh elements.
+    \param ncommon is the minimum number of nodes that two elements must share
+           in order to be connected via an edge in the dual graph.
+    \param numflag is either 0 or 1 indicating if the numbering of the nodes
+           starts from 0 or 1, respectively. The same numbering is used for the
+           returned graph as well.
+    \param r_xadj indicates where the adjacency list of each vertex is stored 
+           in r_adjncy. The memory for this array is allocated by this routine. 
+           It can be freed by calling METIS_free().
+    \param r_adjncy stores the adjacency list of each vertex in the generated 
+           dual graph. The memory for this array is allocated by this routine. 
+           It can be freed by calling METIS_free().
+
+*/
+/*****************************************************************************/
+int METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+          idx_t *ncommon, idx_t *numflag,  idx_t **r_xadj, idx_t **r_adjncy)
+{
+  int sigrval=0, renumber=0;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+
+  /* renumber the mesh */
+  if (*numflag == 1) {
+    ChangeMesh2CNumbering(*ne, eptr, eind);
+    renumber = 1;
+  }
+
+  /* create dual graph */
+  *r_xadj = *r_adjncy = NULL;
+  CreateGraphDual(*ne, *nn, eptr, eind, *ncommon, r_xadj, r_adjncy);
+
+
+SIGTHROW:
+  if (renumber)
+    ChangeMesh2FNumbering(*ne, eptr, eind, *ne, *r_xadj, *r_adjncy);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  if (sigrval != 0) {
+    if (*r_xadj != NULL)
+      free(*r_xadj);
+    if (*r_adjncy != NULL)
+      free(*r_adjncy);
+    *r_xadj = *r_adjncy = NULL;
+  }
+
+  return metis_rcode(sigrval);
+}
+
+
+/*****************************************************************************/
+/*! This function creates a graph corresponding to (almost) the nodal of a 
+    finite element mesh. In the nodal graph, each node is connected to the
+    nodes corresponding to the union of nodes present in all the elements
+    in which that node belongs. 
+
+    \param ne is the number of elements in the mesh.
+    \param nn is the number of nodes in the mesh.
+    \param eptr is an array of size ne+1 used to mark the start and end 
+           locations in the nind array.
+    \param eind is an array that stores for each element the set of node IDs 
+           (indices) that it is made off. The length of this array is equal
+           to the total number of nodes over all the mesh elements.
+    \param numflag is either 0 or 1 indicating if the numbering of the nodes
+           starts from 0 or 1, respectively. The same numbering is used for the
+           returned graph as well.
+    \param r_xadj indicates where the adjacency list of each vertex is stored 
+           in r_adjncy. The memory for this array is allocated by this routine. 
+           It can be freed by calling METIS_free().
+    \param r_adjncy stores the adjacency list of each vertex in the generated 
+           dual graph. The memory for this array is allocated by this routine. 
+           It can be freed by calling METIS_free().
+
+*/
+/*****************************************************************************/
+int METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+          idx_t *numflag,  idx_t **r_xadj, idx_t **r_adjncy)
+{
+  int sigrval=0, renumber=0;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+
+  /* renumber the mesh */
+  if (*numflag == 1) {
+    ChangeMesh2CNumbering(*ne, eptr, eind);
+    renumber = 1;
+  }
+
+  /* create nodal graph */
+  *r_xadj = *r_adjncy = NULL;
+  CreateGraphNodal(*ne, *nn, eptr, eind, r_xadj, r_adjncy);
+
+
+SIGTHROW:
+  if (renumber)
+    ChangeMesh2FNumbering(*ne, eptr, eind, *nn, *r_xadj, *r_adjncy);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  if (sigrval != 0) {
+    if (*r_xadj != NULL)
+      free(*r_xadj);
+    if (*r_adjncy != NULL)
+      free(*r_adjncy);
+    *r_xadj = *r_adjncy = NULL;
+  }
+
+  return metis_rcode(sigrval);
+}
+
+
+/*****************************************************************************/
+/*! This function creates the dual of a finite element mesh */
+/*****************************************************************************/
+void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, 
+          idx_t **r_xadj, idx_t **r_adjncy)
+{
+  idx_t i, j, nnbrs;
+  idx_t *nptr, *nind;
+  idx_t *xadj, *adjncy;
+  idx_t *marker, *nbrs;
+
+  if (ncommon < 1) {
+    printf("  Increased ncommon to 1, as it was initially %"PRIDX"\n", ncommon);
+    ncommon = 1;
+  }
+
+  /* construct the node-element list first */
+  nptr = ismalloc(nn+1, 0, "CreateGraphDual: nptr");
+  nind = imalloc(eptr[ne], "CreateGraphDual: nind");
+
+  for (i=0; i<ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nptr[eind[j]]++;
+  }
+  MAKECSR(i, nn, nptr);
+
+  for (i=0; i<ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nind[nptr[eind[j]]++] = i;
+  }
+  SHIFTCSR(i, nn, nptr);
+
+
+  /* Allocate memory for xadj, since you know its size.
+     These are done using standard malloc as they are returned
+     to the calling function */
+  if ((xadj = (idx_t *)malloc((ne+1)*sizeof(idx_t))) == NULL) 
+    gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n");
+  *r_xadj = xadj;
+  iset(ne+1, 0, xadj);
+
+  /* allocate memory for working arrays used by FindCommonElements */
+  marker = ismalloc(ne, 0, "CreateGraphDual: marker");
+  nbrs   = imalloc(ne, "CreateGraphDual: nbrs");
+
+  for (i=0; i<ne; i++) {
+    xadj[i] = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, 
+                  nind, eptr, ncommon, marker, nbrs);
+  }
+  MAKECSR(i, ne, xadj);
+
+  /* Allocate memory for adjncy, since you now know its size.
+     These are done using standard malloc as they are returned
+     to the calling function */
+  if ((adjncy = (idx_t *)malloc(xadj[ne]*sizeof(idx_t))) == NULL) {
+    free(xadj);
+    *r_xadj = NULL;
+    gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n");
+  }
+  *r_adjncy = adjncy;
+
+  for (i=0; i<ne; i++) {
+    nnbrs = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, 
+                nind, eptr, ncommon, marker, nbrs);
+    for (j=0; j<nnbrs; j++)
+      adjncy[xadj[i]++] = nbrs[j];
+  }
+  SHIFTCSR(i, ne, xadj);
+  
+  gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM);
+}
+
+
+/*****************************************************************************/
+/*! This function finds all elements that share at least ncommon nodes with 
+    the ``query'' element. 
+*/
+/*****************************************************************************/
+idx_t FindCommonElements(idx_t qid, idx_t elen, idx_t *eind, idx_t *nptr, 
+          idx_t *nind, idx_t *eptr, idx_t ncommon, idx_t *marker, idx_t *nbrs)
+{
+  idx_t i, ii, j, jj, k, l, overlap;
+
+  /* find all elements that share at least one node with qid */
+  for (k=0, i=0; i<elen; i++) {
+    j = eind[i];
+    for (ii=nptr[j]; ii<nptr[j+1]; ii++) {
+      jj = nind[ii];
+
+      if (marker[jj] == 0) 
+        nbrs[k++] = jj;
+      marker[jj]++;
+    }
+  }
+
+  /* put qid into the neighbor list (in case it is not there) so that it
+     will be removed in the next step */
+  if (marker[qid] == 0)
+    nbrs[k++] = qid;
+  marker[qid] = 0;
+
+  /* compact the list to contain only those with at least ncommon nodes */
+  for (j=0, i=0; i<k; i++) {
+    overlap = marker[l = nbrs[i]];
+    if (overlap >= ncommon || 
+        overlap >= elen-1 || 
+        overlap >= eptr[l+1]-eptr[l]-1)
+      nbrs[j++] = l;
+    marker[l] = 0;
+  }
+
+  return j;
+}
+
+
+/*****************************************************************************/
+/*! This function creates the (almost) nodal of a finite element mesh */
+/*****************************************************************************/
+void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, 
+          idx_t **r_xadj, idx_t **r_adjncy)
+{
+  idx_t i, j, nnbrs;
+  idx_t *nptr, *nind;
+  idx_t *xadj, *adjncy;
+  idx_t *marker, *nbrs;
+
+
+  /* construct the node-element list first */
+  nptr = ismalloc(nn+1, 0, "CreateGraphNodal: nptr");
+  nind = imalloc(eptr[ne], "CreateGraphNodal: nind");
+
+  for (i=0; i<ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nptr[eind[j]]++;
+  }
+  MAKECSR(i, nn, nptr);
+
+  for (i=0; i<ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nind[nptr[eind[j]]++] = i;
+  }
+  SHIFTCSR(i, nn, nptr);
+
+
+  /* Allocate memory for xadj, since you know its size.
+     These are done using standard malloc as they are returned
+     to the calling function */
+  if ((xadj = (idx_t *)malloc((nn+1)*sizeof(idx_t))) == NULL)
+    gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n");
+  *r_xadj = xadj;
+  iset(nn+1, 0, xadj);
+
+  /* allocate memory for working arrays used by FindCommonElements */
+  marker = ismalloc(nn, 0, "CreateGraphNodal: marker");
+  nbrs   = imalloc(nn, "CreateGraphNodal: nbrs");
+
+  for (i=0; i<nn; i++) {
+    xadj[i] = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, 
+                  eind, marker, nbrs);
+  }
+  MAKECSR(i, nn, xadj);
+
+  /* Allocate memory for adjncy, since you now know its size.
+     These are done using standard malloc as they are returned
+     to the calling function */
+  if ((adjncy = (idx_t *)malloc(xadj[nn]*sizeof(idx_t))) == NULL) {
+    free(xadj);
+    *r_xadj = NULL;
+    gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n");
+  }
+  *r_adjncy = adjncy;
+
+  for (i=0; i<nn; i++) {
+    nnbrs = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, 
+                eind, marker, nbrs);
+    for (j=0; j<nnbrs; j++)
+      adjncy[xadj[i]++] = nbrs[j];
+  }
+  SHIFTCSR(i, nn, xadj);
+  
+  gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM);
+}
+
+
+/*****************************************************************************/
+/*! This function finds the union of nodes that are in the same elements with
+    the ``query'' node. 
+*/
+/*****************************************************************************/
+idx_t FindCommonNodes(idx_t qid, idx_t nelmnts, idx_t *elmntids, idx_t *eptr, 
+          idx_t *eind, idx_t *marker, idx_t *nbrs)
+{
+  idx_t i, ii, j, jj, k;
+
+  /* find all nodes that share at least one element with qid */
+  marker[qid] = 1;  /* this is to prevent self-loops */
+  for (k=0, i=0; i<nelmnts; i++) {
+    j = elmntids[i];
+    for (ii=eptr[j]; ii<eptr[j+1]; ii++) {
+      jj = eind[ii];
+      if (marker[jj] == 0) {
+        nbrs[k++] = jj;
+        marker[jj] = 1;
+      }
+    }
+  }
+
+  /* reset the marker */
+  marker[qid] = 0;
+  for (i=0; i<k; i++) {
+    marker[nbrs[i]] = 0;
+  }
+
+  return k;
+}
+
+
+
+/*************************************************************************/
+/*! This function creates and initializes a mesh_t structure */
+/*************************************************************************/
+mesh_t *CreateMesh(void)
+{
+  mesh_t *mesh;
+
+  mesh = (mesh_t *)gk_malloc(sizeof(mesh_t), "CreateMesh: mesh");
+
+  InitMesh(mesh);
+
+  return mesh;
+}
+
+
+/*************************************************************************/
+/*! This function initializes a mesh_t data structure */
+/*************************************************************************/
+void InitMesh(mesh_t *mesh) 
+{
+  memset((void *)mesh, 0, sizeof(mesh_t));
+}
+
+
+/*************************************************************************/
+/*! This function deallocates any memory stored in a mesh */
+/*************************************************************************/
+void FreeMesh(mesh_t **r_mesh) 
+{
+  mesh_t *mesh = *r_mesh;
+  
+  gk_free((void **)&mesh->eptr, &mesh->eind, &mesh->ewgt, &mesh, LTERM);
+
+  *r_mesh = NULL;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/meshpart.c b/3rdParty/metis/metis-5.1.0/libmetis/meshpart.c
new file mode 100644
index 000000000..a66d10610
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/meshpart.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * meshpart.c
+ *
+ * This file contains routines for partitioning finite element meshes.
+ *
+ * Started 9/29/97
+ * George
+ *
+ * $Id: meshpart.c 13931 2013-03-29 16:48:48Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************
+* This function partitions a finite element mesh by partitioning its nodal
+* graph using KMETIS and then assigning elements in a load balanced fashion.
+**************************************************************************/
+int METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+          idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, 
+          idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart)
+{
+  int sigrval=0, renumber=0, ptype;
+  idx_t *xadj=NULL, *adjncy=NULL;
+  idx_t ncon=1, pnumflag=0;
+  int rstatus=METIS_OK;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+  renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0);
+  ptype    = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY);
+
+  /* renumber the mesh */
+  if (renumber) {
+    ChangeMesh2CNumbering(*ne, eptr, eind);
+    options[METIS_OPTION_NUMBERING] = 0;
+  }
+
+  /* get the nodal graph */
+  rstatus = METIS_MeshToNodal(ne, nn, eptr, eind, &pnumflag, &xadj, &adjncy);
+  if (rstatus != METIS_OK)
+    raise(SIGERR);
+
+  /* partition the graph */
+  if (ptype == METIS_PTYPE_KWAY) 
+    rstatus = METIS_PartGraphKway(nn, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
+                  nparts, tpwgts, NULL, options, objval, npart);
+  else 
+    rstatus = METIS_PartGraphRecursive(nn, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
+                  nparts, tpwgts, NULL, options, objval, npart);
+
+  if (rstatus != METIS_OK)
+    raise(SIGERR);
+
+  /* partition the other side of the mesh */
+  InduceRowPartFromColumnPart(*ne, eptr, eind, epart, npart, *nparts, tpwgts);
+
+
+SIGTHROW:
+  if (renumber) {
+    ChangeMesh2FNumbering2(*ne, *nn, eptr, eind, epart, npart);
+    options[METIS_OPTION_NUMBERING] = 1;
+  }
+
+  METIS_Free(xadj);
+  METIS_Free(adjncy);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  return metis_rcode(sigrval);
+}
+
+
+
+/*************************************************************************
+* This function partitions a finite element mesh by partitioning its dual
+* graph using KMETIS and then assigning nodes in a load balanced fashion.
+**************************************************************************/
+int METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
+          idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, 
+          real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, 
+          idx_t *npart) 
+{
+  int sigrval=0, renumber=0, ptype;
+  idx_t i, j;
+  idx_t *xadj=NULL, *adjncy=NULL, *nptr=NULL, *nind=NULL;
+  idx_t ncon=1, pnumflag=0;
+  int rstatus = METIS_OK;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+  renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0);
+  ptype    = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY);
+
+  /* renumber the mesh */
+  if (renumber) {
+    ChangeMesh2CNumbering(*ne, eptr, eind);
+    options[METIS_OPTION_NUMBERING] = 0;
+  }
+
+  /* get the dual graph */
+  rstatus = METIS_MeshToDual(ne, nn, eptr, eind, ncommon, &pnumflag, &xadj, &adjncy);
+  if (rstatus != METIS_OK)
+    raise(SIGERR);
+
+  /* partition the graph */
+  if (ptype == METIS_PTYPE_KWAY) 
+    rstatus = METIS_PartGraphKway(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
+                  nparts, tpwgts, NULL, options, objval, epart);
+  else 
+    rstatus = METIS_PartGraphRecursive(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
+                  nparts, tpwgts, NULL, options, objval, epart);
+
+  if (rstatus != METIS_OK)
+    raise(SIGERR);
+
+
+  /* construct the node-element list */
+  nptr = ismalloc(*nn+1, 0, "METIS_PartMeshDual: nptr");
+  nind = imalloc(eptr[*ne], "METIS_PartMeshDual: nind");
+
+  for (i=0; i<*ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nptr[eind[j]]++;
+  }
+  MAKECSR(i, *nn, nptr);
+
+  for (i=0; i<*ne; i++) {
+    for (j=eptr[i]; j<eptr[i+1]; j++)
+      nind[nptr[eind[j]]++] = i;
+  }
+  SHIFTCSR(i, *nn, nptr);
+
+  /* partition the other side of the mesh */
+  InduceRowPartFromColumnPart(*nn, nptr, nind, npart, epart, *nparts, tpwgts);
+
+  gk_free((void **)&nptr, &nind, LTERM);
+
+
+SIGTHROW:
+  if (renumber) {
+    ChangeMesh2FNumbering2(*ne, *nn, eptr, eind, epart, npart);
+    options[METIS_OPTION_NUMBERING] = 1;
+  }
+
+  METIS_Free(xadj);
+  METIS_Free(adjncy);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  return metis_rcode(sigrval);
+}
+
+
+
+/*************************************************************************/
+/*! Induces a partitioning of the rows based on a a partitioning of the
+    columns. It is used by both the Nodal and Dual routines. */
+/*************************************************************************/
+void InduceRowPartFromColumnPart(idx_t nrows, idx_t *rowptr, idx_t *rowind,
+         idx_t *rpart, idx_t *cpart, idx_t nparts, real_t *tpwgts)
+{
+  idx_t i, j, k, me;
+  idx_t nnbrs, *pwgts, *nbrdom, *nbrwgt, *nbrmrk;
+  idx_t *itpwgts;
+
+  pwgts  = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: pwgts");
+  nbrdom = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: nbrdom");
+  nbrwgt = ismalloc(nparts, 0, "InduceRowPartFromColumnPart: nbrwgt");
+  nbrmrk = ismalloc(nparts, -1, "InduceRowPartFromColumnPart: nbrmrk");
+
+  iset(nrows, -1, rpart);
+
+  /* setup the integer target partition weights */
+  itpwgts = imalloc(nparts, "InduceRowPartFromColumnPart: itpwgts");
+  if (tpwgts == NULL) {
+    iset(nparts, 1+nrows/nparts, itpwgts);
+  }
+  else {
+    for (i=0; i<nparts; i++)
+      itpwgts[i] = 1+nrows*tpwgts[i];
+  }
+
+  /* first assign the rows consisting only of columns that belong to 
+     a single partition. Assign rows that are empty to -2 (un-assigned) */
+  for (i=0; i<nrows; i++) {
+    if (rowptr[i+1]-rowptr[i] == 0) {
+      rpart[i] = -2;
+      continue;
+    }
+
+    me = cpart[rowind[rowptr[i]]];
+    for (j=rowptr[i]+1; j<rowptr[i+1]; j++) {
+      if (cpart[rowind[j]] != me)
+        break;
+    }
+    if (j == rowptr[i+1]) {
+      rpart[i] = me;
+      pwgts[me]++;
+    }
+  }
+
+  /* next assign the rows consisting of columns belonging to multiple
+     partitions in a  balanced way */
+  for (i=0; i<nrows; i++) {
+    if (rpart[i] == -1) { 
+      for (nnbrs=0, j=rowptr[i]; j<rowptr[i+1]; j++) {
+        me = cpart[rowind[j]];
+        if (nbrmrk[me] == -1) {
+          nbrdom[nnbrs] = me; 
+          nbrwgt[nnbrs] = 1; 
+          nbrmrk[me] = nnbrs++;
+        }
+        else {
+          nbrwgt[nbrmrk[me]]++;
+        }
+      }
+      ASSERT(nnbrs > 0);
+
+      /* assign it first to the domain with most things in common */
+      rpart[i] = nbrdom[iargmax(nnbrs, nbrwgt)];
+
+      /* if overweight, assign it to the light domain */
+      if (pwgts[rpart[i]] > itpwgts[rpart[i]]) {
+        for (j=0; j<nnbrs; j++) {
+          if (pwgts[nbrdom[j]] < itpwgts[nbrdom[j]] ||
+              pwgts[nbrdom[j]]-itpwgts[nbrdom[j]] < pwgts[rpart[i]]-itpwgts[rpart[i]]) {
+            rpart[i] = nbrdom[j];
+            break;
+          }
+        }
+      }
+      pwgts[rpart[i]]++;
+
+      /* reset nbrmrk array */
+      for (j=0; j<nnbrs; j++) 
+        nbrmrk[nbrdom[j]] = -1;
+    }
+  }
+
+  gk_free((void **)&pwgts, &nbrdom, &nbrwgt, &nbrmrk, &itpwgts, LTERM);
+
+}
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/metislib.h b/3rdParty/metis/metis-5.1.0/libmetis/metislib.h
new file mode 100644
index 000000000..93d48f011
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/metislib.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * metis.h
+ *
+ * This file includes all necessary header files
+ *
+ * Started 8/27/94
+ * George
+ *
+ * $Id: metislib.h 10655 2011-08-02 17:38:11Z benjamin $
+ */
+
+#ifndef _LIBMETIS_METISLIB_H_
+#define _LIBMETIS_METISLIB_H_
+
+#include <GKlib.h>
+
+#if defined(ENABLE_OPENMP)
+  #include <omp.h>
+#endif
+
+
+#include <metis.h>
+#include <rename.h>
+#include <gklib_defs.h>
+
+#include <defs.h>
+#include <struct.h>
+#include <macros.h>
+#include <proto.h>
+
+
+#if defined(COMPILER_MSC)
+#if defined(rint)
+  #undef rint
+#endif
+#define rint(x) ((idx_t)((x)+0.5))  /* MSC does not have rint() function */
+#endif
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/minconn.c b/3rdParty/metis/metis-5.1.0/libmetis/minconn.c
new file mode 100644
index 000000000..86dc90fbe
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/minconn.c
@@ -0,0 +1,729 @@
+/*!
+\file 
+\brief Functions that deal with prunning the number of adjacent subdomains in kmetis
+
+\date Started 7/15/98
+\author George
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version $Id: minconn.c 10513 2011-07-07 22:06:03Z karypis $
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function computes the subdomain graph storing the result in the
+    pre-allocated worspace arrays */
+/*************************************************************************/
+void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, pid, other, nparts, nvtxs, nnbrs;
+  idx_t *xadj, *adjncy, *adjwgt, *where;
+  idx_t *pptr, *pind;
+  idx_t nads=0, *vadids, *vadwgts;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+  where  = graph->where;
+
+  nparts = ctrl->nparts; 
+
+  vadids  = ctrl->pvec1;
+  vadwgts = iset(nparts, 0, ctrl->pvec2);
+
+  pptr = iwspacemalloc(ctrl, nparts+1);
+  pind = iwspacemalloc(ctrl, nvtxs);
+  iarray2csr(nvtxs, nparts, where, pptr, pind);
+
+  for (pid=0; pid<nparts; pid++) {
+    switch (ctrl->objtype) {
+      case METIS_OBJTYPE_CUT:
+        {
+          ckrinfo_t *rinfo;
+          cnbr_t *nbrs;
+
+          rinfo = graph->ckrinfo;
+          for (nads=0, ii=pptr[pid]; ii<pptr[pid+1]; ii++) {
+            i = pind[ii];
+            ASSERT(pid == where[i]);
+      
+            if (rinfo[i].ed > 0) {
+              nnbrs = rinfo[i].nnbrs;
+              nbrs  = ctrl->cnbrpool + rinfo[i].inbr;
+      
+              for (j=0; j<nnbrs; j++) {
+                other = nbrs[j].pid;
+                if (vadwgts[other] == 0)
+                  vadids[nads++] = other;
+                vadwgts[other] += nbrs[j].ed;
+              }
+            }
+          }
+        }
+        break;
+
+      case METIS_OBJTYPE_VOL:
+        {
+          vkrinfo_t *rinfo;
+          vnbr_t *nbrs;
+
+          rinfo = graph->vkrinfo;
+          for (nads=0, ii=pptr[pid]; ii<pptr[pid+1]; ii++) {
+            i = pind[ii];
+            ASSERT(pid == where[i]);
+      
+            if (rinfo[i].ned > 0) {
+              nnbrs = rinfo[i].nnbrs;
+              nbrs  = ctrl->vnbrpool + rinfo[i].inbr;
+      
+              for (j=0; j<nnbrs; j++) {
+                other = nbrs[j].pid;
+                if (vadwgts[other] == 0)
+                  vadids[nads++] = other;
+                vadwgts[other] += nbrs[j].ned;
+              }
+            }
+          }
+        }
+        break;
+
+      default:
+        gk_errexit(SIGERR, "Unknown objtype: %d\n", ctrl->objtype);
+    }
+
+    /* See if you have enough memory to store the adjacent info for that subdomain */
+    if (ctrl->maxnads[pid] < nads) {
+      ctrl->maxnads[pid] = 2*nads;
+      ctrl->adids[pid]   = irealloc(ctrl->adids[pid], ctrl->maxnads[pid], 
+                               "ComputeSubDomainGraph: adids[pid]");
+      ctrl->adwgts[pid]  = irealloc(ctrl->adwgts[pid], ctrl->maxnads[pid], 
+                               "ComputeSubDomainGraph: adids[pid]");
+    }
+
+    ctrl->nads[pid] = nads;
+    for (j=0; j<nads; j++) {
+      ctrl->adids[pid][j]  = vadids[j];
+      ctrl->adwgts[pid][j] = vadwgts[vadids[j]];
+
+      vadwgts[vadids[j]] = 0;
+    }
+  }
+      
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function updates the weight of an edge in the subdomain graph by
+    adding to it the value of ewgt. The update can either increase or
+    decrease the weight of the subdomain edge based on the value of ewgt.
+
+    \param u is the ID of one of the incident subdomains to the edge
+    \param v is the ID of the other incident subdomains to the edge
+    \param ewgt is the weight to be added to the subdomain edge
+    \param nparts is the number of subdomains
+    \param r_maxndoms is the maximum number of adjacent subdomains and is
+           updated as necessary. The update is skipped if a NULL value is
+           supplied.
+*/
+/*************************************************************************/
+void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, 
+         idx_t *r_maxndoms)
+{
+  idx_t i, j, nads;
+
+  if (ewgt == 0)
+    return;
+
+  for (i=0; i<2; i++) {
+    nads = ctrl->nads[u];
+    /* Find the edge */
+    for (j=0; j<nads; j++) {
+      if (ctrl->adids[u][j] == v) {
+        ctrl->adwgts[u][j] += ewgt;
+        break;
+      }
+    }
+
+    if (j == nads) {
+      /* Deal with the case in which the edge was not found */
+      ASSERT(ewgt > 0);
+      if (ctrl->maxnads[u] == nads) {
+        ctrl->maxnads[u] = 2*(nads+1);
+        ctrl->adids[u]   = irealloc(ctrl->adids[u], ctrl->maxnads[u], 
+                               "IncreaseEdgeSubDomainGraph: adids[pid]");
+        ctrl->adwgts[u]  = irealloc(ctrl->adwgts[u], ctrl->maxnads[u], 
+                               "IncreaseEdgeSubDomainGraph: adids[pid]");
+      }
+      ctrl->adids[u][nads]  = v;
+      ctrl->adwgts[u][nads] = ewgt;
+      nads++;
+      if (r_maxndoms != NULL && nads > *r_maxndoms) {
+        printf("You just increased the maxndoms: %"PRIDX" %"PRIDX"\n", 
+            nads, *r_maxndoms);
+        *r_maxndoms = nads;
+      }
+    }
+    else {
+      /* See if the updated edge becomes 0 */
+      ASSERT(ctrl->adwgts[u][j] >= 0);
+      if (ctrl->adwgts[u][j] == 0) {
+        ctrl->adids[u][j]  = ctrl->adids[u][nads-1];
+        ctrl->adwgts[u][j] = ctrl->adwgts[u][nads-1];
+        nads--;
+        if (r_maxndoms != NULL && nads+1 == *r_maxndoms)
+          *r_maxndoms = ctrl->nads[iargmax(ctrl->nparts, ctrl->nads)];
+      }
+    }
+    ctrl->nads[u] = nads;
+
+    SWAP(u, v, j);
+  }
+}
+
+
+/*************************************************************************/
+/*! This function computes the subdomain graph */
+/*************************************************************************/
+void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, k, ncon, nparts, scheme, pid_from, pid_to, me, other, nvtxs, 
+        total, max, avg, totalout, nind=0, ncand=0, ncand2, target, target2, 
+        nadd, bestnadd=0;
+  idx_t min, move, *cpwgt;
+  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, 
+        *mypmat, *otherpmat, *kpmat, *ind;
+  idx_t *nads, **adids, **adwgts;
+  ikv_t *cand, *cand2;
+  ipq_t queue;
+  real_t *tpwgts, badfactor=1.4;
+  idx_t *pptr, *pind;
+  idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL;  /* volume specific work arrays */
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+  adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt);
+
+  where = graph->where;
+  pwgts = graph->pwgts;  /* We assume that this is properly initialized */
+
+  nparts = ctrl->nparts;
+  tpwgts = ctrl->tpwgts;
+
+  cpwgt     = iwspacemalloc(ctrl, ncon);
+  maxpwgt   = iwspacemalloc(ctrl, nparts*ncon);
+  ind       = iwspacemalloc(ctrl, nvtxs);
+  otherpmat = iset(nparts, 0, iwspacemalloc(ctrl, nparts));
+
+  cand  = ikvwspacemalloc(ctrl, nparts);
+  cand2 = ikvwspacemalloc(ctrl, nparts);
+
+  pptr = iwspacemalloc(ctrl, nparts+1);
+  pind = iwspacemalloc(ctrl, nvtxs);
+  iarray2csr(nvtxs, nparts, where, pptr, pind);
+
+  if (ctrl->objtype == METIS_OBJTYPE_VOL) {
+    /* Vol-refinement specific working arrays */
+    modind  = iwspacemalloc(ctrl, nvtxs);
+    vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
+    pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
+  }
+
+
+  /* Compute the pmat matrix and ndoms */
+  ComputeSubDomainGraph(ctrl, graph);
+
+  nads   = ctrl->nads;
+  adids  = ctrl->adids;
+  adwgts = ctrl->adwgts;
+
+  mypmat = iset(nparts, 0, ctrl->pvec1);
+  kpmat  = iset(nparts, 0, ctrl->pvec2);
+
+  /* Compute the maximum allowed weight for each domain */
+  for (i=0; i<nparts; i++) {
+    for (j=0; j<ncon; j++)
+      maxpwgt[i*ncon+j] = 
+          (ncon == 1 ? 1.25 : 1.025)*tpwgts[i]*graph->tvwgt[j]*ctrl->ubfactors[j];
+  }
+
+  ipqInit(&queue, nparts);
+
+  /* Get into the loop eliminating subdomain connections */
+  while (1) {
+    total = isum(nparts, nads, 1);
+    avg   = total/nparts;
+    max   = nads[iargmax(nparts, nads)];
+
+    IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
+          printf("Adjacent Subdomain Stats: Total: %3"PRIDX", "
+                 "Max: %3"PRIDX"[%zu], Avg: %3"PRIDX"\n", 
+                 total, max, iargmax(nparts, nads), avg)); 
+
+    if (max < badfactor*avg)
+      break;
+
+    /* Add the subdomains that you will try to reduce their connectivity */
+    ipqReset(&queue);
+    for (i=0; i<nparts; i++) {
+      if (nads[i] >= avg + (max-avg)/2)
+        ipqInsert(&queue, i, nads[i]);
+    }
+
+    move = 0;
+    while ((me = ipqGetTop(&queue)) != -1) {
+      totalout = isum(nads[me], adwgts[me], 1);
+
+      for (ncand2=0, i=0; i<nads[me]; i++) {
+        mypmat[adids[me][i]] = adwgts[me][i];
+
+        /* keep track of the weakly connected adjacent subdomains */
+        if (2*nads[me]*adwgts[me][i] < totalout) {
+          cand2[ncand2].val   = adids[me][i];
+          cand2[ncand2++].key = adwgts[me][i];
+        }
+      }
+
+      IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
+            printf("Me: %"PRIDX", Degree: %4"PRIDX", TotalOut: %"PRIDX",\n", 
+                me, nads[me], totalout));
+
+      /* Sort the connections according to their cut */
+      ikvsorti(ncand2, cand2);
+
+      /* Two schemes are used for eliminating subdomain edges.
+         The first, tries to eliminate subdomain edges by moving remote groups 
+         of vertices to subdomains that 'me' is already connected to.
+         The second, tries to eliminate subdomain edges by moving entire sets of 
+         my vertices that connect to the 'other' subdomain to a subdomain that 
+         I'm already connected to.
+         These two schemes are applied in sequence. */
+      target = target2 = -1;
+      for (scheme=0; scheme<2; scheme++) {
+        for (min=0; min<ncand2; min++) {
+          other = cand2[min].val;
+
+          /* pid_from is the subdomain from where the vertices will be removed.
+             pid_to is the adjacent subdomain to pid_from that defines the 
+             (me, other) subdomain edge that needs to be removed */
+          if (scheme == 0) {
+            pid_from = other;
+            pid_to   = me;
+          }
+          else {
+            pid_from  = me;
+            pid_to    = other;
+          }
+  
+          /* Go and find the vertices in 'other' that are connected in 'me' */
+          for (nind=0, ii=pptr[pid_from]; ii<pptr[pid_from+1]; ii++) {
+            i = pind[ii];
+            ASSERT(where[i] == pid_from);
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              if (where[adjncy[j]] == pid_to) {
+                ind[nind++] = i;
+                break;
+              }
+            }
+          }
+  
+          /* Go and construct the otherpmat to see where these nind vertices are 
+             connected to */
+          iset(ncon, 0, cpwgt);
+          for (ncand=0, ii=0; ii<nind; ii++) {
+            i = ind[ii];
+            iaxpy(ncon, 1, vwgt+i*ncon, 1, cpwgt, 1);
+    
+            for (j=xadj[i]; j<xadj[i+1]; j++) {
+              if ((k = where[adjncy[j]]) == pid_from)
+                continue;
+              if (otherpmat[k] == 0)
+                cand[ncand++].val = k;
+              otherpmat[k] += (adjwgt ? adjwgt[j] : 1);
+            }
+          }
+    
+          for (i=0; i<ncand; i++) {
+            cand[i].key = otherpmat[cand[i].val];
+            ASSERT(cand[i].key > 0);
+          }
+
+          ikvsortd(ncand, cand);
+    
+          IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
+                printf("\tMinOut: %4"PRIDX", to: %3"PRIDX", TtlWgt: %5"PRIDX"[#:%"PRIDX"]\n", 
+                    mypmat[other], other, isum(ncon, cpwgt, 1), nind));
+
+          /* Go through and select the first domain that is common with 'me', and does
+             not increase the nads[target] higher than nads[me], subject to the maxpwgt
+             constraint. Traversal is done from the mostly connected to the least. */
+          for (i=0; i<ncand; i++) {
+            k = cand[i].val;
+    
+            if (mypmat[k] > 0) {
+              /* Check if balance will go off */
+              if (!ivecaxpylez(ncon, 1, cpwgt, pwgts+k*ncon, maxpwgt+k*ncon))
+                continue;
+    
+              /* get a dense vector out of k's connectivity */
+              for (j=0; j<nads[k]; j++) 
+                kpmat[adids[k][j]] = adwgts[k][j];
+    
+              /* Check if the move to domain k will increase the nads of another
+                 subdomain j that the set of vertices being moved are connected
+                 to but domain k is not connected to. */
+              for (j=0; j<nparts; j++) {
+                if (otherpmat[j] > 0 && kpmat[j] == 0 && nads[j]+1 >= nads[me]) 
+                  break;
+              }
+  
+              /* There were no bad second level effects. See if you can find a
+                 subdomain to move to. */
+              if (j == nparts) { 
+                for (nadd=0, j=0; j<nparts; j++) {
+                  if (otherpmat[j] > 0 && kpmat[j] == 0)
+                    nadd++;
+                }
+    
+                IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
+                      printf("\t\tto=%"PRIDX", nadd=%"PRIDX", %"PRIDX"\n", k, nadd, nads[k]));
+    
+                if (nads[k]+nadd < nads[me]) {
+                  if (target2 == -1 || nads[target2]+bestnadd > nads[k]+nadd ||
+                      (nads[target2]+bestnadd == nads[k]+nadd && bestnadd > nadd)) {
+                    target2  = k;
+                    bestnadd = nadd;
+                  }
+                }
+  
+                if (nadd == 0) 
+                  target = k;
+              }
+
+              /* reset kpmat for the next iteration */
+              for (j=0; j<nads[k]; j++) 
+                kpmat[adids[k][j]] = 0;
+            }
+
+            if (target != -1)
+              break;
+          }
+
+          /* reset the otherpmat for the next iteration */
+          for (i=0; i<ncand; i++) 
+            otherpmat[cand[i].val] = 0;
+
+          if (target == -1 && target2 != -1)
+            target = target2;
+    
+          if (target != -1) {
+            IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
+                printf("\t\tScheme: %"PRIDX". Moving to %"PRIDX"\n", scheme, target));
+            move = 1;
+            break;
+          }
+        }
+
+        if (target != -1)
+          break;  /* A move was found. No need to try the other scheme */
+      }
+
+      /* reset the mypmat for next iteration */
+      for (i=0; i<nads[me]; i++) 
+        mypmat[adids[me][i]] = 0;
+
+      /* Note that once a target is found the above loops exit right away. So the
+         following variables are valid */
+      if (target != -1) {
+        switch (ctrl->objtype) {
+          case METIS_OBJTYPE_CUT:
+            MoveGroupMinConnForCut(ctrl, graph, target, nind, ind);
+            break;
+          case METIS_OBJTYPE_VOL:
+            MoveGroupMinConnForVol(ctrl, graph, target, nind, ind, vmarker, 
+                pmarker, modind);
+            break;
+          default:
+            gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+        }
+
+        /* Update the csr representation of the partitioning vector */
+        iarray2csr(nvtxs, nparts, where, pptr, pind);
+      }
+    }
+
+    if (move == 0)
+      break;
+  }
+
+  ipqFree(&queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function moves a collection of vertices and updates their rinfo */
+/*************************************************************************/
+void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, 
+         idx_t *ind)
+{
+  idx_t i, ii, j, jj, k, l, nvtxs, nbnd, from, me;
+  idx_t *xadj, *adjncy, *adjwgt, *where, *bndptr, *bndind;
+  ckrinfo_t *myrinfo;
+  cnbr_t *mynbrs;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  where  = graph->where;
+  bndptr = graph->bndptr;
+  bndind = graph->bndind;
+
+  nbnd = graph->nbnd;
+
+  while (--nind>=0) {
+    i    = ind[nind];
+    from = where[i];
+
+    myrinfo = graph->ckrinfo+i;
+    if (myrinfo->inbr == -1) {
+      myrinfo->inbr  = cnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+      myrinfo->nnbrs = 0;
+    }
+    mynbrs = ctrl->cnbrpool + myrinfo->inbr;
+
+    /* find the location of 'to' in myrinfo or create it if it is not there */
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      if (mynbrs[k].pid == to)
+        break;
+    }
+    if (k == myrinfo->nnbrs) {
+      ASSERT(k < xadj[i+1]-xadj[i]);
+      mynbrs[k].pid = to;
+      mynbrs[k].ed  = 0;
+      myrinfo->nnbrs++;
+    }
+
+    /* Update pwgts */
+    iaxpy(graph->ncon,  1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon,   1);
+    iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1);
+
+    /* Update mincut */
+    graph->mincut -= mynbrs[k].ed-myrinfo->id;
+
+    /* Update subdomain connectivity graph to reflect the move of 'i' */
+    UpdateEdgeSubDomainGraph(ctrl, from, to, myrinfo->id-mynbrs[k].ed, NULL);
+
+    /* Update ID/ED and BND related information for the moved vertex */
+    UpdateMovedVertexInfoAndBND(i, from, k, to, myrinfo, mynbrs, where, nbnd, 
+        bndptr, bndind, BNDTYPE_REFINE);
+
+    /* Update the degrees of adjacent vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      ii = adjncy[j];
+      me = where[ii];
+      myrinfo = graph->ckrinfo+ii;
+
+      UpdateAdjacentVertexInfoAndBND(ctrl, ii, xadj[ii+1]-xadj[ii], me,
+          from, to, myrinfo, adjwgt[j], nbnd, bndptr, bndind, BNDTYPE_REFINE);
+
+      /* Update subdomain graph to reflect the move of 'i' for domains other 
+         than 'from' and 'to' */
+      if (me != from && me != to) {
+        UpdateEdgeSubDomainGraph(ctrl, from, me, -adjwgt[j], NULL);
+        UpdateEdgeSubDomainGraph(ctrl, to, me, adjwgt[j], NULL);
+      }
+    }
+  }
+
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+
+  graph->nbnd = nbnd;
+
+}
+
+
+/*************************************************************************/
+/*! This function moves a collection of vertices and updates their rinfo */
+/*************************************************************************/
+void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, 
+         idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind)
+{
+  idx_t i, ii, j, jj, k, l, nvtxs, from, me, other, xgain, ewgt;
+  idx_t *xadj, *vsize, *adjncy, *where;
+  vkrinfo_t *myrinfo, *orinfo;
+  vnbr_t *mynbrs, *onbrs;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vsize  = graph->vsize;
+  adjncy = graph->adjncy;
+  where  = graph->where;
+
+  while (--nind>=0) {
+    i    = ind[nind];
+    from = where[i];
+
+    myrinfo = graph->vkrinfo+i;
+    if (myrinfo->inbr == -1) {
+      myrinfo->inbr  = vnbrpoolGetNext(ctrl, xadj[i+1]-xadj[i]+1);
+      myrinfo->nnbrs = 0;
+    }
+    mynbrs = ctrl->vnbrpool + myrinfo->inbr;
+
+    xgain = (myrinfo->nid == 0 && myrinfo->ned > 0 ? vsize[i] : 0);
+
+    //printf("Moving %"PRIDX" from %"PRIDX" to %"PRIDX" [vsize: %"PRIDX"] [xgain: %"PRIDX"]\n", 
+    //    i, from, to, vsize[i], xgain);
+    
+    /* find the location of 'to' in myrinfo or create it if it is not there */
+    for (k=0; k<myrinfo->nnbrs; k++) {
+      if (mynbrs[k].pid == to)
+        break;
+    }
+
+    if (k == myrinfo->nnbrs) {
+      //printf("Missing neighbor\n");
+
+      if (myrinfo->nid > 0)
+        xgain -= vsize[i];
+
+      /* determine the volume gain resulting from that move */
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        ii     = adjncy[j];
+        other  = where[ii];
+        orinfo = graph->vkrinfo+ii;
+        onbrs  = ctrl->vnbrpool + orinfo->inbr;
+        ASSERT(other != to)
+
+        //printf("  %8d %8d %3d\n", (int)ii, (int)vsize[ii], (int)other);
+
+        if (from == other) {
+          /* Same subdomain vertex: Decrease the gain if 'to' is a new neighbor. */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == to)
+              break;
+          }
+          if (l == orinfo->nnbrs) 
+            xgain -= vsize[ii];
+        }
+        else {
+          /* Remote vertex: increase if 'to' is a new subdomain */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == to)
+              break;
+          }
+          if (l == orinfo->nnbrs) 
+            xgain -= vsize[ii];
+
+          /* Remote vertex: decrease if i is the only connection to 'from' */
+          for (l=0; l<orinfo->nnbrs; l++) {
+            if (onbrs[l].pid == from && onbrs[l].ned == 1) {
+              xgain += vsize[ii];
+              break;
+            }
+          }
+        }
+      }
+      graph->minvol -= xgain;
+      graph->mincut -= -myrinfo->nid;
+      ewgt = myrinfo->nid;
+    }
+    else {
+      graph->minvol -= (xgain + mynbrs[k].gv);
+      graph->mincut -= mynbrs[k].ned-myrinfo->nid;
+      ewgt = myrinfo->nid-mynbrs[k].ned;
+    }
+
+    /* Update where and pwgts */
+    where[i] = to;
+    iaxpy(graph->ncon,  1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+to*graph->ncon,   1);
+    iaxpy(graph->ncon, -1, graph->vwgt+i*graph->ncon, 1, graph->pwgts+from*graph->ncon, 1);
+
+    /* Update subdomain connectivity graph to reflect the move of 'i' */
+    UpdateEdgeSubDomainGraph(ctrl, from, to, ewgt, NULL);
+
+    /* Update the subdomain connectivity of the adjacent vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      me = where[adjncy[j]];
+      if (me != from && me != to) {
+        UpdateEdgeSubDomainGraph(ctrl, from, me, -1, NULL);
+        UpdateEdgeSubDomainGraph(ctrl, to, me, 1, NULL);
+      }
+    }
+
+    /* Update the id/ed/gains/bnd of potentially affected nodes */
+    KWayVolUpdate(ctrl, graph, i, from, to, NULL, NULL, NULL, NULL,
+        NULL, BNDTYPE_REFINE, vmarker, pmarker, modind);
+
+    /*CheckKWayVolPartitionParams(ctrl, graph);*/
+  }
+  ASSERT(ComputeCut(graph, where) == graph->mincut);
+  ASSERTP(ComputeVolume(graph, where) == graph->minvol, 
+      ("%"PRIDX" %"PRIDX"\n", ComputeVolume(graph, where), graph->minvol));
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the subdomain graph. For deubuging purposes. */
+/*************************************************************************/
+void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where)
+{
+  idx_t i, j, k, me, nvtxs, total, max;
+  idx_t *xadj, *adjncy, *adjwgt, *pmat;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  pmat = ismalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat");
+
+  for (i=0; i<nvtxs; i++) {
+    me = where[i];
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (where[k] != me) 
+        pmat[me*nparts+where[k]] += adjwgt[j];
+    }
+  }
+
+  /* printf("Subdomain Info\n"); */
+  total = max = 0;
+  for (i=0; i<nparts; i++) {
+    for (k=0, j=0; j<nparts; j++) {
+      if (pmat[i*nparts+j] > 0)
+        k++;
+    }
+    total += k;
+
+    if (k > max)
+      max = k;
+/*
+    printf("%2"PRIDX" -> %2"PRIDX"  ", i, k);
+    for (j=0; j<nparts; j++) {
+      if (pmat[i*nparts+j] > 0)
+        printf("[%2"PRIDX" %4"PRIDX"] ", j, pmat[i*nparts+j]);
+    }
+    printf("\n");
+*/
+  }
+  printf("Total adjacent subdomains: %"PRIDX", Max: %"PRIDX"\n", total, max);
+
+  gk_free((void **)&pmat, LTERM);
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/mincover.c b/3rdParty/metis/metis-5.1.0/libmetis/mincover.c
new file mode 100644
index 000000000..ed437fff1
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/mincover.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * mincover.c
+ *
+ * This file implements the minimum cover algorithm
+ *
+ * Started 8/1/97
+ * George
+ *
+ * $Id: mincover.c 9942 2011-05-17 22:09:52Z karypis $
+ */
+
+#include "metislib.h"
+
+/*************************************************************************
+* Constants used by mincover algorithm
+**************************************************************************/
+#define INCOL 10
+#define INROW 20
+#define VC 1
+#define SC 2
+#define HC 3
+#define VR 4
+#define SR 5
+#define HR 6
+
+
+/*************************************************************************
+* This function returns the min-cover of a bipartite graph.
+* The algorithm used is due to Hopcroft and Karp as modified by Duff etal
+* adj: the adjacency list of the bipartite graph
+*       asize: the number of vertices in the first part of the bipartite graph
+* bsize-asize: the number of vertices in the second part
+*        0..(asize-1) > A vertices
+*        asize..bsize > B vertices
+*
+* Returns:
+*  cover : the actual cover (array)
+*  csize : the size of the cover
+**************************************************************************/
+void MinCover(idx_t *xadj, idx_t *adjncy, idx_t asize, idx_t bsize, idx_t *cover, idx_t *csize)
+{
+  idx_t i, j;
+  idx_t *mate, *queue, *flag, *level, *lst;
+  idx_t fptr, rptr, lstptr;
+  idx_t row, maxlevel, col;
+
+  mate = ismalloc(bsize, -1, "MinCover: mate");
+  flag = imalloc(bsize, "MinCover: flag");
+  level = imalloc(bsize, "MinCover: level");
+  queue = imalloc(bsize, "MinCover: queue");
+  lst = imalloc(bsize, "MinCover: lst");
+
+  /* Get a cheap matching */
+  for (i=0; i<asize; i++) {
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      if (mate[adjncy[j]] == -1) {
+        mate[i] = adjncy[j];
+        mate[adjncy[j]] = i;
+        break;
+      }
+    }
+  }
+
+  /* Get into the main loop */
+  while (1) {
+    /* Initialization */
+    fptr = rptr = 0;   /* Empty Queue */
+    lstptr = 0;        /* Empty List */
+    for (i=0; i<bsize; i++) {
+      level[i] = -1;
+      flag[i] = 0;
+    }
+    maxlevel = bsize;
+
+    /* Insert free nodes into the queue */
+    for (i=0; i<asize; i++) 
+      if (mate[i] == -1) {
+        queue[rptr++] = i;
+        level[i] = 0;
+      }
+
+    /* Perform the BFS */
+    while (fptr != rptr) {
+      row = queue[fptr++];
+      if (level[row] < maxlevel) {
+        flag[row] = 1;
+        for (j=xadj[row]; j<xadj[row+1]; j++) {
+          col = adjncy[j];
+          if (!flag[col]) {  /* If this column has not been accessed yet */
+            flag[col] = 1;
+            if (mate[col] == -1) { /* Free column node was found */
+              maxlevel = level[row];
+              lst[lstptr++] = col;
+            }
+            else { /* This column node is matched */
+              if (flag[mate[col]]) 
+                printf("\nSomething wrong, flag[%"PRIDX"] is 1",mate[col]);
+              queue[rptr++] = mate[col];
+              level[mate[col]] = level[row] + 1;
+            }
+          }
+        }
+      } 
+    }
+
+    if (lstptr == 0)
+      break;   /* No free columns can be reached */
+
+    /* Perform restricted DFS from the free column nodes */
+    for (i=0; i<lstptr; i++)
+      MinCover_Augment(xadj, adjncy, lst[i], mate, flag, level, maxlevel);
+  }
+
+  MinCover_Decompose(xadj, adjncy, asize, bsize, mate, cover, csize);
+
+  gk_free((void **)&mate, &flag, &level, &queue, &lst, LTERM);
+
+}
+
+
+/*************************************************************************
+* This function perfoms a restricted DFS and augments matchings
+**************************************************************************/
+idx_t MinCover_Augment(idx_t *xadj, idx_t *adjncy, idx_t col, idx_t *mate, idx_t *flag, idx_t *level, idx_t maxlevel)
+{
+  idx_t i;
+  idx_t row = -1;
+  idx_t status;
+
+  flag[col] = 2;
+  for (i=xadj[col]; i<xadj[col+1]; i++) {
+    row = adjncy[i];
+
+    if (flag[row] == 1) { /* First time through this row node */
+      if (level[row] == maxlevel) {  /* (col, row) is an edge of the G^T */
+        flag[row] = 2;  /* Mark this node as being visited */
+        if (maxlevel != 0)
+          status = MinCover_Augment(xadj, adjncy, mate[row], mate, flag, level, maxlevel-1);
+        else
+          status = 1;
+
+        if (status) {
+          mate[col] = row;
+          mate[row] = col;
+          return 1;
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+
+
+/*************************************************************************
+* This function performs a coarse decomposition and determines the 
+* min-cover.
+* REF: Pothen ACMTrans. on Amth Software
+**************************************************************************/
+void MinCover_Decompose(idx_t *xadj, idx_t *adjncy, idx_t asize, idx_t bsize, idx_t *mate, idx_t *cover, idx_t *csize)
+{
+  idx_t i, k;
+  idx_t *where;
+  idx_t card[10];
+
+  where = imalloc(bsize, "MinCover_Decompose: where");
+  for (i=0; i<10; i++)
+    card[i] = 0;
+
+  for (i=0; i<asize; i++)
+    where[i] = SC;
+  for (; i<bsize; i++)
+    where[i] = SR;
+
+  for (i=0; i<asize; i++) 
+    if (mate[i] == -1)  
+      MinCover_ColDFS(xadj, adjncy, i, mate, where, INCOL);
+  for (; i<bsize; i++) 
+    if (mate[i] == -1)  
+      MinCover_RowDFS(xadj, adjncy, i, mate, where, INROW);
+
+  for (i=0; i<bsize; i++) 
+    card[where[i]]++;
+
+  k = 0;
+  if (iabs(card[VC]+card[SC]-card[HR]) < iabs(card[VC]-card[SR]-card[HR])) {  /* S = VC+SC+HR */
+    /* printf("%"PRIDX" %"PRIDX" ",vc+sc, hr); */
+    for (i=0; i<bsize; i++) 
+      if (where[i] == VC || where[i] == SC || where[i] == HR)
+        cover[k++] = i;
+  }
+  else {  /* S = VC+SR+HR */
+    /* printf("%"PRIDX" %"PRIDX" ",vc, hr+sr); */
+    for (i=0; i<bsize; i++) 
+      if (where[i] == VC || where[i] == SR || where[i] == HR)
+        cover[k++] = i;
+  }
+
+  *csize = k;
+  gk_free((void **)&where, LTERM);
+
+}
+
+
+/*************************************************************************
+* This function perfoms a dfs starting from an unmatched col node
+* forming alternate paths
+**************************************************************************/
+void MinCover_ColDFS(idx_t *xadj, idx_t *adjncy, idx_t root, idx_t *mate, idx_t *where, idx_t flag)
+{
+  idx_t i;
+
+  if (flag == INCOL) {
+    if (where[root] == HC)
+      return;
+    where[root] = HC;
+    for (i=xadj[root]; i<xadj[root+1]; i++) 
+      MinCover_ColDFS(xadj, adjncy, adjncy[i], mate, where, INROW);
+  }
+  else {
+    if (where[root] == HR)
+      return;
+    where[root] = HR;
+    if (mate[root] != -1)
+      MinCover_ColDFS(xadj, adjncy, mate[root], mate, where, INCOL);
+  }
+
+}
+
+/*************************************************************************
+* This function perfoms a dfs starting from an unmatched col node
+* forming alternate paths
+**************************************************************************/
+void MinCover_RowDFS(idx_t *xadj, idx_t *adjncy, idx_t root, idx_t *mate, idx_t *where, idx_t flag)
+{
+  idx_t i;
+
+  if (flag == INROW) {
+    if (where[root] == VR)
+      return;
+    where[root] = VR;
+    for (i=xadj[root]; i<xadj[root+1]; i++) 
+      MinCover_RowDFS(xadj, adjncy, adjncy[i], mate, where, INCOL);
+  }
+  else {
+    if (where[root] == VC)
+      return;
+    where[root] = VC;
+    if (mate[root] != -1)
+      MinCover_RowDFS(xadj, adjncy, mate[root], mate, where, INROW);
+  }
+
+}
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/mmd.c b/3rdParty/metis/metis-5.1.0/libmetis/mmd.c
new file mode 100644
index 000000000..778cc1548
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/mmd.c
@@ -0,0 +1,593 @@
+/*
+ * mmd.c
+ *
+ * **************************************************************
+ * The following C function was developed from a FORTRAN subroutine
+ * in SPARSPAK written by Eleanor Chu, Alan George, Joseph Liu
+ * and Esmond Ng.
+ * 
+ * The FORTRAN-to-C transformation and modifications such as dynamic
+ * memory allocation and deallocation were performed by Chunguang
+ * Sun.
+ * ************************************************************** 
+ *
+ * Taken from SMMS, George 12/13/94
+ *
+ * The meaning of invperm, and perm vectors is different from that
+ * in genqmd_ of SparsPak
+ *
+ * $Id: mmd.c 5993 2009-01-07 02:09:57Z karypis $
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************
+*  genmmd  -- multiple minimum external degree
+*  purpose -- this routine implements the minimum degree
+*     algorithm. it makes use of the implicit representation
+*     of elimination graphs by quotient graphs, and the notion
+*     of indistinguishable nodes. It also implements the modifications
+*     by multiple elimination and minimum external degree.
+*     Caution -- the adjacency vector adjncy will be destroyed.
+*  Input parameters --
+*     neqns -- number of equations.
+*     (xadj, adjncy) -- the adjacency structure.
+*     delta  -- tolerance value for multiple elimination.
+*     maxint -- maximum machine representable (short) integer
+*               (any smaller estimate will do) for marking nodes.
+*  Output parameters --
+*     perm -- the minimum degree ordering.
+*     invp -- the inverse of perm.
+*     *ncsub -- an upper bound on the number of nonzero subscripts
+*               for the compressed storage scheme.
+*  Working parameters --
+*     head -- vector for head of degree lists.
+*     invp  -- used temporarily for degree forward link.
+*     perm  -- used temporarily for degree backward link.
+*     qsize -- vector for size of supernodes.
+*     list -- vector for temporary linked lists.
+*     marker -- a temporary marker vector.
+*  Subroutines used -- mmdelm, mmdint, mmdnum, mmdupd.
+**************************************************************************/
+void genmmd(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *invp, idx_t *perm,
+     idx_t delta, idx_t *head, idx_t *qsize, idx_t *list, idx_t *marker,
+     idx_t maxint, idx_t *ncsub)
+{
+    idx_t  ehead, i, mdeg, mdlmt, mdeg_node, nextmd, num, tag;
+
+    if (neqns <= 0)  
+      return;
+
+    /* Adjust from C to Fortran */
+    xadj--; adjncy--; invp--; perm--; head--; qsize--; list--; marker--;
+
+    /* initialization for the minimum degree algorithm. */
+    *ncsub = 0;
+    mmdint(neqns, xadj, adjncy, head, invp, perm, qsize, list, marker);
+
+    /*  'num' counts the number of ordered nodes plus 1. */
+    num = 1;
+
+    /* eliminate all isolated nodes. */
+    nextmd = head[1];
+    while (nextmd > 0) {
+      mdeg_node = nextmd;
+      nextmd = invp[mdeg_node];
+      marker[mdeg_node] = maxint;
+      invp[mdeg_node] = -num;
+      num = num + 1;
+    }
+
+    /* search for node of the minimum degree. 'mdeg' is the current */
+    /* minimum degree; 'tag' is used to facilitate marking nodes.   */
+    if (num > neqns) 
+      goto n1000;
+    tag = 1;
+    head[1] = 0;
+    mdeg = 2;
+
+    /* infinite loop here ! */
+    while (1) {
+      while (head[mdeg] <= 0) 
+        mdeg++;
+
+      /* use value of 'delta' to set up 'mdlmt', which governs */
+      /* when a degree update is to be performed.              */
+      mdlmt = mdeg + delta;
+      ehead = 0;
+
+n500:
+      mdeg_node = head[mdeg];
+      while (mdeg_node <= 0) {
+        mdeg++;
+
+        if (mdeg > mdlmt) 
+          goto n900;
+        mdeg_node = head[mdeg];
+      };
+
+      /*  remove 'mdeg_node' from the degree structure. */
+      nextmd = invp[mdeg_node];
+      head[mdeg] = nextmd;
+      if (nextmd > 0)  
+        perm[nextmd] = -mdeg;
+      invp[mdeg_node] = -num;
+      *ncsub += mdeg + qsize[mdeg_node] - 2;
+      if ((num+qsize[mdeg_node]) > neqns)  
+        goto n1000;
+
+      /*  eliminate 'mdeg_node' and perform quotient graph */
+      /*  transformation. reset 'tag' value if necessary.    */
+      tag++;
+      if (tag >= maxint) {
+        tag = 1;
+        for (i = 1; i <= neqns; i++)
+          if (marker[i] < maxint)  
+            marker[i] = 0;
+      };
+
+      mmdelm(mdeg_node, xadj, adjncy, head, invp, perm, qsize, list, marker, maxint, tag);
+
+      num += qsize[mdeg_node];
+      list[mdeg_node] = ehead;
+      ehead = mdeg_node;
+      if (delta >= 0) 
+        goto n500;
+
+ n900:
+      /* update degrees of the nodes involved in the  */
+      /* minimum degree nodes elimination.            */
+      if (num > neqns)  
+        goto n1000;
+      mmdupd( ehead, neqns, xadj, adjncy, delta, &mdeg, head, invp, perm, qsize, list, marker, maxint, &tag);
+    }; /* end of -- while ( 1 ) -- */
+
+n1000:
+    mmdnum( neqns, perm, invp, qsize );
+
+    /* Adjust from Fortran back to C*/
+    xadj++; adjncy++; invp++; perm++; head++; qsize++; list++; marker++;
+}
+
+
+/**************************************************************************
+*           mmdelm ...... multiple minimum degree elimination
+* Purpose -- This routine eliminates the node mdeg_node of minimum degree
+*     from the adjacency structure, which is stored in the quotient
+*     graph format. It also transforms the quotient graph representation
+*     of the elimination graph.
+* Input parameters --
+*     mdeg_node -- node of minimum degree.
+*     maxint -- estimate of maximum representable (short) integer.
+*     tag    -- tag value.
+* Updated parameters --
+*     (xadj, adjncy) -- updated adjacency structure.
+*     (head, forward, backward) -- degree doubly linked structure.
+*     qsize -- size of supernode.
+*     marker -- marker vector.
+*     list -- temporary linked list of eliminated nabors.
+***************************************************************************/
+void mmdelm(idx_t mdeg_node, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward,
+     idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker, idx_t maxint, idx_t tag)
+{
+    idx_t   element, i,   istop, istart, j,
+          jstop, jstart, link,
+          nabor, node, npv, nqnbrs, nxnode,
+          pvnode, rlmt, rloc, rnode, xqnbr;
+
+    /* find the reachable set of 'mdeg_node' and */
+    /* place it in the data structure.           */
+    marker[mdeg_node] = tag;
+    istart = xadj[mdeg_node];
+    istop = xadj[mdeg_node+1] - 1;
+
+    /* 'element' points to the beginning of the list of  */
+    /* eliminated nabors of 'mdeg_node', and 'rloc' gives the */
+    /* storage location for the next reachable node.   */
+    element = 0;
+    rloc = istart;
+    rlmt = istop;
+    for ( i = istart; i <= istop; i++ ) {
+        nabor = adjncy[i];
+        if ( nabor == 0 ) break;
+        if ( marker[nabor] < tag ) {
+           marker[nabor] = tag;
+           if ( forward[nabor] < 0 )  {
+              list[nabor] = element;
+              element = nabor;
+           } else {
+              adjncy[rloc] = nabor;
+              rloc++;
+           };
+        }; /* end of -- if -- */
+    }; /* end of -- for -- */
+
+  /* merge with reachable nodes from generalized elements. */
+  while ( element > 0 ) {
+      adjncy[rlmt] = -element;
+      link = element;
+
+n400:
+      jstart = xadj[link];
+      jstop = xadj[link+1] - 1;
+      for ( j = jstart; j <= jstop; j++ ) {
+          node = adjncy[j];
+          link = -node;
+          if ( node < 0 )  goto n400;
+          if ( node == 0 ) break;
+          if ((marker[node]<tag)&&(forward[node]>=0)) {
+             marker[node] = tag;
+             /*use storage from eliminated nodes if necessary.*/
+             while ( rloc >= rlmt ) {
+                   link = -adjncy[rlmt];
+                   rloc = xadj[link];
+                   rlmt = xadj[link+1] - 1;
+             };
+             adjncy[rloc] = node;
+             rloc++;
+          };
+      }; /* end of -- for ( j = jstart; -- */
+      element = list[element];
+    };  /* end of -- while ( element > 0 ) -- */
+    if ( rloc <= rlmt ) adjncy[rloc] = 0;
+    /* for each node in the reachable set, do the following. */
+    link = mdeg_node;
+
+n1100:
+    istart = xadj[link];
+    istop = xadj[link+1] - 1;
+    for ( i = istart; i <= istop; i++ ) {
+        rnode = adjncy[i];
+        link = -rnode;
+        if ( rnode < 0 ) goto n1100;
+        if ( rnode == 0 ) return;
+
+        /* 'rnode' is in the degree list structure. */
+        pvnode = backward[rnode];
+        if (( pvnode != 0 ) && ( pvnode != (-maxint) )) {
+           /* then remove 'rnode' from the structure. */
+           nxnode = forward[rnode];
+           if ( nxnode > 0 ) backward[nxnode] = pvnode;
+           if ( pvnode > 0 ) forward[pvnode] = nxnode;
+           npv = -pvnode;
+           if ( pvnode < 0 ) head[npv] = nxnode;
+        };
+
+        /* purge inactive quotient nabors of 'rnode'. */
+        jstart = xadj[rnode];
+        jstop = xadj[rnode+1] - 1;
+        xqnbr = jstart;
+        for ( j = jstart; j <= jstop; j++ ) {
+            nabor = adjncy[j];
+            if ( nabor == 0 ) break;
+            if ( marker[nabor] < tag ) {
+                adjncy[xqnbr] = nabor;
+                xqnbr++;
+            };
+        };
+
+        /* no active nabor after the purging. */
+        nqnbrs = xqnbr - jstart;
+        if ( nqnbrs <= 0 ) {
+           /* merge 'rnode' with 'mdeg_node'. */
+           qsize[mdeg_node] += qsize[rnode];
+           qsize[rnode] = 0;
+           marker[rnode] = maxint;
+           forward[rnode] = -mdeg_node;
+           backward[rnode] = -maxint;
+        } else {
+           /* flag 'rnode' for degree update, and  */
+           /* add 'mdeg_node' as a nabor of 'rnode'.      */
+           forward[rnode] = nqnbrs + 1;
+           backward[rnode] = 0;
+           adjncy[xqnbr] = mdeg_node;
+           xqnbr++;
+           if ( xqnbr <= jstop )  adjncy[xqnbr] = 0;
+        };
+      }; /* end of -- for ( i = istart; -- */
+      return;
+ }
+
+/***************************************************************************
+*    mmdint ---- mult minimum degree initialization
+*    purpose -- this routine performs initialization for the
+*       multiple elimination version of the minimum degree algorithm.
+*    input parameters --
+*       neqns  -- number of equations.
+*       (xadj, adjncy) -- adjacency structure.
+*    output parameters --
+*       (head, dfrow, backward) -- degree doubly linked structure.
+*       qsize -- size of supernode ( initialized to one).
+*       list -- linked list.
+*       marker -- marker vector.
+****************************************************************************/
+idx_t  mmdint(idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t *head, idx_t *forward,
+     idx_t *backward, idx_t *qsize, idx_t *list, idx_t *marker)
+{
+    idx_t  fnode, ndeg, node;
+
+    for ( node = 1; node <= neqns; node++ ) {
+        head[node] = 0;
+        qsize[node] = 1;
+        marker[node] = 0;
+        list[node] = 0;
+    };
+
+    /* initialize the degree doubly linked lists. */
+    for ( node = 1; node <= neqns; node++ ) {
+        ndeg = xadj[node+1] - xadj[node]/* + 1*/;   /* george */
+        if (ndeg == 0)
+          ndeg = 1;
+        fnode = head[ndeg];
+        forward[node] = fnode;
+        head[ndeg] = node;
+        if ( fnode > 0 ) backward[fnode] = node;
+        backward[node] = -ndeg;
+    };
+    return 0;
+}
+
+/****************************************************************************
+* mmdnum --- multi minimum degree numbering
+* purpose -- this routine performs the final step in producing
+*    the permutation and inverse permutation vectors in the
+*    multiple elimination version of the minimum degree
+*    ordering algorithm.
+* input parameters --
+*     neqns -- number of equations.
+*     qsize -- size of supernodes at elimination.
+* updated parameters --
+*     invp -- inverse permutation vector. on input,
+*             if qsize[node] = 0, then node has been merged
+*             into the node -invp[node]; otherwise,
+*            -invp[node] is its inverse labelling.
+* output parameters --
+*     perm -- the permutation vector.
+****************************************************************************/
+void mmdnum(idx_t neqns, idx_t *perm, idx_t *invp, idx_t *qsize)
+{
+  idx_t father, nextf, node, nqsize, num, root;
+
+  for ( node = 1; node <= neqns; node++ ) {
+      nqsize = qsize[node];
+      if ( nqsize <= 0 ) perm[node] = invp[node];
+      if ( nqsize > 0 )  perm[node] = -invp[node];
+  };
+
+  /* for each node which has been merged, do the following. */
+  for ( node = 1; node <= neqns; node++ ) {
+      if ( perm[node] <= 0 )  {
+
+	 /* trace the merged tree until one which has not */
+         /* been merged, call it root.                    */
+         father = node;
+         while ( perm[father] <= 0 )
+            father = - perm[father];
+
+         /* number node after root. */
+         root = father;
+         num = perm[root] + 1;
+         invp[node] = -num;
+         perm[root] = num;
+
+         /* shorten the merged tree. */
+         father = node;
+         nextf = - perm[father];
+         while ( nextf > 0 ) {
+            perm[father] = -root;
+            father = nextf;
+            nextf = -perm[father];
+         };
+      };  /* end of -- if ( perm[node] <= 0 ) -- */
+  }; /* end of -- for ( node = 1; -- */
+
+  /* ready to compute perm. */
+  for ( node = 1; node <= neqns; node++ ) {
+        num = -invp[node];
+        invp[node] = num;
+        perm[num] = node;
+  };
+  return;
+}
+
+/****************************************************************************
+* mmdupd ---- multiple minimum degree update
+* purpose -- this routine updates the degrees of nodes after a
+*            multiple elimination step.
+* input parameters --
+*    ehead -- the beginning of the list of eliminated nodes
+*             (i.e., newly formed elements).
+*    neqns -- number of equations.
+*    (xadj, adjncy) -- adjacency structure.
+*    delta -- tolerance value for multiple elimination.
+*    maxint -- maximum machine representable (short) integer.
+* updated parameters --
+*    mdeg -- new minimum degree after degree update.
+*    (head, forward, backward) -- degree doubly linked structure.
+*    qsize -- size of supernode.
+*    list -- marker vector for degree update.
+*    *tag   -- tag value.
+****************************************************************************/
+void mmdupd(idx_t ehead, idx_t neqns, idx_t *xadj, idx_t *adjncy, idx_t delta, idx_t *mdeg,
+     idx_t *head, idx_t *forward, idx_t *backward, idx_t *qsize, idx_t *list,
+     idx_t *marker, idx_t maxint, idx_t *tag)
+{
+ idx_t  deg, deg0, element, enode, fnode, i, iq2, istop,
+      istart, j, jstop, jstart, link, mdeg0, mtag, nabor,
+      node, q2head, qxhead;
+
+      mdeg0 = *mdeg + delta;
+      element = ehead;
+
+n100:
+      if ( element <= 0 ) return;
+
+      /* for each of the newly formed element, do the following. */
+      /* reset tag value if necessary.                           */
+      mtag = *tag + mdeg0;
+      if ( mtag >= maxint ) {
+         *tag = 1;
+         for ( i = 1; i <= neqns; i++ )
+             if ( marker[i] < maxint ) marker[i] = 0;
+         mtag = *tag + mdeg0;
+      };
+
+      /* create two linked lists from nodes associated with 'element': */
+      /* one with two nabors (q2head) in the adjacency structure, and the*/
+      /* other with more than two nabors (qxhead). also compute 'deg0',*/
+      /* number of nodes in this element.                              */
+      q2head = 0;
+      qxhead = 0;
+      deg0 = 0;
+      link =element;
+
+n400:
+      istart = xadj[link];
+      istop = xadj[link+1] - 1;
+      for ( i = istart; i <= istop; i++ ) {
+          enode = adjncy[i];
+          link = -enode;
+          if ( enode < 0 )  goto n400;
+          if ( enode == 0 ) break;
+          if ( qsize[enode] != 0 ) {
+             deg0 += qsize[enode];
+             marker[enode] = mtag;
+
+             /*'enode' requires a degree update*/
+             if ( backward[enode] == 0 ) {
+                /* place either in qxhead or q2head list. */
+                if ( forward[enode] != 2 ) {
+                     list[enode] = qxhead;
+                     qxhead = enode;
+                } else {
+                     list[enode] = q2head;
+                     q2head = enode;
+                };
+             };
+          }; /* enf of -- if ( qsize[enode] != 0 ) -- */
+      }; /* end of -- for ( i = istart; -- */
+
+      /* for each node in q2 list, do the following. */
+      enode = q2head;
+      iq2 = 1;
+
+n900:
+      if ( enode <= 0 ) goto n1500;
+      if ( backward[enode] != 0 ) goto n2200;
+      (*tag)++;
+      deg = deg0;
+
+      /* identify the other adjacent element nabor. */
+      istart = xadj[enode];
+      nabor = adjncy[istart];
+      if ( nabor == element ) nabor = adjncy[istart+1];
+      link = nabor;
+      if ( forward[nabor] >= 0 ) {
+           /* nabor is uneliminated, increase degree count. */
+           deg += qsize[nabor];
+           goto n2100;
+      };
+
+       /* the nabor is eliminated. for each node in the 2nd element */
+       /* do the following.                                         */
+n1000:
+       istart = xadj[link];
+       istop = xadj[link+1] - 1;
+       for ( i = istart; i <= istop; i++ ) {
+           node = adjncy[i];
+           link = -node;
+           if ( node != enode ) {
+                if ( node < 0 ) goto n1000;
+                if ( node == 0 )  goto n2100;
+                if ( qsize[node] != 0 ) {
+                     if ( marker[node] < *tag ) {
+                        /* 'node' is not yet considered. */
+                        marker[node] = *tag;
+                        deg += qsize[node];
+                     } else {
+                        if ( backward[node] == 0 ) {
+                             if ( forward[node] == 2 ) {
+                                /* 'node' is indistinguishable from 'enode'.*/
+                                /* merge them into a new supernode.         */
+                                qsize[enode] += qsize[node];
+                                qsize[node] = 0;
+                                marker[node] = maxint;
+                                forward[node] = -enode;
+                                backward[node] = -maxint;
+                             } else {
+                                /* 'node' is outmacthed by 'enode' */
+				if (backward[node]==0) backward[node] = -maxint;
+                             };
+                        }; /* end of -- if ( backward[node] == 0 ) -- */
+                    }; /* end of -- if ( marker[node] < *tag ) -- */
+                }; /* end of -- if ( qsize[node] != 0 ) -- */
+              }; /* end of -- if ( node != enode ) -- */
+          }; /* end of -- for ( i = istart; -- */
+          goto n2100;
+
+n1500:
+          /* for each 'enode' in the 'qx' list, do the following. */
+          enode = qxhead;
+          iq2 = 0;
+
+n1600:    if ( enode <= 0 )  goto n2300;
+          if ( backward[enode] != 0 )  goto n2200;
+          (*tag)++;
+          deg = deg0;
+
+          /*for each unmarked nabor of 'enode', do the following.*/
+          istart = xadj[enode];
+          istop = xadj[enode+1] - 1;
+          for ( i = istart; i <= istop; i++ ) {
+                nabor = adjncy[i];
+                if ( nabor == 0 ) break;
+                if ( marker[nabor] < *tag ) {
+                     marker[nabor] = *tag;
+                     link = nabor;
+                     if ( forward[nabor] >= 0 ) 
+                          /*if uneliminated, include it in deg count.*/
+                          deg += qsize[nabor];
+                     else {
+n1700:
+                          /* if eliminated, include unmarked nodes in this*/
+                          /* element into the degree count.             */
+                          jstart = xadj[link];
+                          jstop = xadj[link+1] - 1;
+                          for ( j = jstart; j <= jstop; j++ ) {
+                                node = adjncy[j];
+                                link = -node;
+                                if ( node < 0 ) goto n1700;
+                                if ( node == 0 ) break;
+                                if ( marker[node] < *tag ) {
+                                    marker[node] = *tag;
+                                    deg += qsize[node];
+                                };
+                          }; /* end of -- for ( j = jstart; -- */
+                     }; /* end of -- if ( forward[nabor] >= 0 ) -- */
+                  }; /* end of -- if ( marker[nabor] < *tag ) -- */
+          }; /* end of -- for ( i = istart; -- */
+
+n2100:
+          /* update external degree of 'enode' in degree structure, */
+          /* and '*mdeg' if necessary.                     */
+          deg = deg - qsize[enode] + 1;
+          fnode = head[deg];
+          forward[enode] = fnode;
+          backward[enode] = -deg;
+          if ( fnode > 0 ) backward[fnode] = enode;
+          head[deg] = enode;
+          if ( deg < *mdeg ) *mdeg = deg;
+
+n2200:
+          /* get next enode in current element. */
+          enode = list[enode];
+          if ( iq2 == 1 ) goto n900;
+          goto n1600;
+
+n2300:
+          /* get next element in the list. */
+          *tag = mtag;
+          element = list[element];
+          goto n100;
+    }
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/ometis.c b/3rdParty/metis/metis-5.1.0/libmetis/ometis.c
new file mode 100644
index 000000000..51e39754c
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/ometis.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * ometis.c
+ *
+ * This file contains the top level routines for the multilevel recursive
+ * bisection algorithm PMETIS.
+ *
+ * Started 7/24/97
+ * George
+ *
+ * $Id: ometis.c 10513 2011-07-07 22:06:03Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point for the multilevel nested dissection 
+    ordering code. At each bisection, a node-separator is computed using
+    a node-based refinement approach.
+
+    \param nvtxs is the number of vertices in the graph.
+    \param xadj is of length nvtxs+1 marking the start of the adjancy 
+           list of each vertex in adjncy.
+    \param adjncy stores the adjacency lists of the vertices. The adjnacy
+           list of a vertex should not contain the vertex itself.
+    \param vwgt is an array of size nvtxs storing the weight of each 
+           vertex. If vwgt is NULL, then the vertices are considered 
+           to have unit weight.
+    \param numflag is either 0 or 1 indicating that the numbering of 
+           the vertices starts from 0 or 1, respectively.
+    \param options is an array of size METIS_NOPTIONS used to pass 
+           various options impacting the of the algorithm. A NULL
+           value indicates use of default options.
+    \param perm is an array of size nvtxs such that if A and A' are
+           the original and permuted matrices, then A'[i] = A[perm[i]].
+    \param iperm is an array of size nvtxs such that if A and A' are
+           the original and permuted matrices, then A[i] = A'[iperm[i]].
+*/
+/*************************************************************************/
+int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+          idx_t *options, idx_t *perm, idx_t *iperm) 
+{
+  int sigrval=0, renumber=0;
+  idx_t i, ii, j, l, nnvtxs=0;
+  graph_t *graph=NULL;
+  ctrl_t *ctrl;
+  idx_t *cptr, *cind, *piperm;
+  int numflag = 0;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+
+  /* set up the run time parameters */
+  ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL);
+  if (!ctrl) {
+    gk_siguntrap();
+    return METIS_ERROR_INPUT;
+  }
+
+  /* if required, change the numbering to 0 */
+  if (ctrl->numflag == 1) {
+    Change2CNumbering(*nvtxs, xadj, adjncy);
+    renumber = 1;
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr));
+
+  /* prune the dense columns */
+  if (ctrl->pfactor > 0.0) { 
+    piperm = imalloc(*nvtxs, "OMETIS: piperm");
+
+    graph = PruneGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor);
+    if (graph == NULL) {
+      /* if there was no prunning, cleanup the pfactor */
+      gk_free((void **)&piperm, LTERM);
+      ctrl->pfactor = 0.0;
+    }
+    else {
+      nnvtxs = graph->nvtxs;
+      ctrl->compress = 0;  /* disable compression if prunning took place */
+    }
+  }
+
+  /* compress the graph; note that compression only happens if not prunning 
+     has taken place. */
+  if (ctrl->compress) { 
+    cptr = imalloc(*nvtxs+1, "OMETIS: cptr");
+    cind = imalloc(*nvtxs, "OMETIS: cind");
+
+    graph = CompressGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, cptr, cind);
+    if (graph == NULL) {
+      /* if there was no compression, cleanup the compress flag */
+      gk_free((void **)&cptr, &cind, LTERM);
+      ctrl->compress = 0; 
+    }
+    else {
+      nnvtxs = graph->nvtxs;
+      ctrl->cfactor = 1.0*(*nvtxs)/nnvtxs;
+      if (ctrl->cfactor > 1.5 && ctrl->nseps == 1)
+        ctrl->nseps = 2;
+      //ctrl->nseps = (idx_t)(ctrl->cfactor*ctrl->nseps);
+    }
+  }
+
+  /* if no prunning and no compression, setup the graph in the normal way. */
+  if (ctrl->pfactor == 0.0 && ctrl->compress == 0) 
+    graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);
+
+  ASSERT(CheckGraph(graph, ctrl->numflag, 1));
+
+  /* allocate workspace memory */
+  AllocateWorkSpace(ctrl, graph);
+
+  /* do the nested dissection ordering  */
+  if (ctrl->ccorder) 
+    MlevelNestedDissectionCC(ctrl, graph, iperm, graph->nvtxs);
+  else
+    MlevelNestedDissection(ctrl, graph, iperm, graph->nvtxs);
+
+
+  if (ctrl->pfactor > 0.0) { /* Order any prunned vertices */
+    icopy(nnvtxs, iperm, perm);  /* Use perm as an auxiliary array */
+    for (i=0; i<nnvtxs; i++)
+      iperm[piperm[i]] = perm[i];
+    for (i=nnvtxs; i<*nvtxs; i++)
+      iperm[piperm[i]] = i;
+
+    gk_free((void **)&piperm, LTERM);
+  }
+  else if (ctrl->compress) { /* Uncompress the ordering */
+    /* construct perm from iperm */
+    for (i=0; i<nnvtxs; i++)
+      perm[iperm[i]] = i; 
+    for (l=ii=0; ii<nnvtxs; ii++) {
+      i = perm[ii];
+      for (j=cptr[i]; j<cptr[i+1]; j++)
+        iperm[cind[j]] = l++;
+    }
+
+    gk_free((void **)&cptr, &cind, LTERM);
+  }
+
+  for (i=0; i<*nvtxs; i++)
+    perm[iperm[i]] = i;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));
+
+  /* clean up */
+  FreeCtrl(&ctrl);
+
+SIGTHROW:
+  /* if required, change the numbering back to 1 */
+  if (renumber)
+    Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  return metis_rcode(sigrval);
+}
+
+
+/*************************************************************************/
+/*! This is the driver for the recursive tri-section of a graph into the
+    left, separator, and right partitions. The graphs correspond to the 
+    left and right parts are further tri-sected in a recursive fashion.
+    The nodes in the separator are ordered at the end of the left & right
+    nodes.
+ */
+/*************************************************************************/
+void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order, 
+         idx_t lastvtx)
+{
+  idx_t i, j, nvtxs, nbnd;
+  idx_t *label, *bndind;
+  graph_t *lgraph, *rgraph;
+
+  nvtxs = graph->nvtxs;
+
+  MlevelNodeBisectionMultiple(ctrl, graph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, 
+      printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", 
+        graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]));
+
+
+  /* Order the nodes in the separator */
+  nbnd   = graph->nbnd;
+  bndind = graph->bndind;
+  label  = graph->label;
+  for (i=0; i<nbnd; i++) 
+    order[label[bndind[i]]] = --lastvtx;
+
+  SplitGraphOrder(ctrl, graph, &lgraph, &rgraph);
+
+  /* Free the memory of the top level graph */
+  FreeGraph(&graph);
+
+  /* Recurse on lgraph first, as its lastvtx depends on rgraph->nvtxs, which
+     will not be defined upon return from MlevelNestedDissection. */
+  if (lgraph->nvtxs > MMDSWITCH && lgraph->nedges > 0) 
+    MlevelNestedDissection(ctrl, lgraph, order, lastvtx-rgraph->nvtxs);
+  else {
+    MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); 
+    FreeGraph(&lgraph);
+  }
+  if (rgraph->nvtxs > MMDSWITCH && rgraph->nedges > 0) 
+    MlevelNestedDissection(ctrl, rgraph, order, lastvtx);
+  else {
+    MMDOrder(ctrl, rgraph, order, lastvtx); 
+    FreeGraph(&rgraph);
+  }
+}
+
+
+/*************************************************************************/
+/*! This routine is similar to its non 'CC' counterpart. The difference is
+    that after each tri-section, the connected components of the original
+    graph that result after removing the separator vertises are ordered
+    independently (i.e., this may lead to more than just the left and 
+    the right subgraphs).
+*/
+/*************************************************************************/
+void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order, 
+         idx_t lastvtx)
+{
+  idx_t i, j, nvtxs, nbnd, ncmps, rnvtxs, snvtxs;
+  idx_t *label, *bndind;
+  idx_t *cptr, *cind;
+  graph_t **sgraphs;
+
+  nvtxs = graph->nvtxs;
+
+  MlevelNodeBisectionMultiple(ctrl, graph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, 
+      printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", 
+        graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]));
+
+  /* Order the nodes in the separator */
+  nbnd   = graph->nbnd;
+  bndind = graph->bndind;
+  label  = graph->label;
+  for (i=0; i<nbnd; i++) 
+    order[label[bndind[i]]] = --lastvtx;
+
+  WCOREPUSH;
+  cptr  = iwspacemalloc(ctrl, nvtxs+1);
+  cind  = iwspacemalloc(ctrl, nvtxs);
+  ncmps = FindSepInducedComponents(ctrl, graph, cptr, cind);
+
+  if (ctrl->dbglvl&METIS_DBG_INFO) {
+    if (ncmps > 2)
+      printf("  Bisection resulted in %"PRIDX" connected components\n", ncmps);
+  }
+  
+  sgraphs = SplitGraphOrderCC(ctrl, graph, ncmps, cptr, cind);
+
+  WCOREPOP;
+
+  /* Free the memory of the top level graph */
+  FreeGraph(&graph);
+
+  /* Go and process the subgraphs */
+  for (rnvtxs=i=0; i<ncmps; i++) {
+    /* Save the number of vertices in sgraphs[i] because sgraphs[i] is freed 
+       inside MlevelNestedDissectionCC, and as such it will be undefined. */
+    snvtxs = sgraphs[i]->nvtxs;
+
+    if (sgraphs[i]->nvtxs > MMDSWITCH && sgraphs[i]->nedges > 0) {
+      MlevelNestedDissectionCC(ctrl, sgraphs[i], order, lastvtx-rnvtxs);
+    }
+    else {
+      MMDOrder(ctrl, sgraphs[i], order, lastvtx-rnvtxs);
+      FreeGraph(&sgraphs[i]);
+    }
+    rnvtxs += snvtxs;
+  }
+
+  gk_free((void **)&sgraphs, LTERM);
+}
+
+
+/*************************************************************************/
+/*! This function performs multilevel node bisection (i.e., tri-section).
+    It performs multiple bisections and selects the best. */
+/*************************************************************************/
+void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, mincut;
+  idx_t *bestwhere;
+
+  /* if the graph is small, just find a single vertex separator */
+  if (ctrl->nseps == 1 || graph->nvtxs < (ctrl->compress ? 1000 : 2000)) {
+    MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS);
+    return;
+  }
+
+  WCOREPUSH;
+
+  bestwhere = iwspacemalloc(ctrl, graph->nvtxs);
+
+  mincut = graph->tvwgt[0];
+  for (i=0; i<ctrl->nseps; i++) {
+    MlevelNodeBisectionL2(ctrl, graph, LARGENIPARTS);
+
+    if (i == 0 || graph->mincut < mincut) {
+      mincut = graph->mincut;
+      if (i < ctrl->nseps-1)
+        icopy(graph->nvtxs, graph->where, bestwhere);
+    }
+
+    if (mincut == 0)
+      break;
+
+    if (i < ctrl->nseps-1) 
+      FreeRData(graph);
+  }
+
+  if (mincut != graph->mincut) {
+    icopy(graph->nvtxs, bestwhere, graph->where);
+    Compute2WayNodePartitionParams(ctrl, graph);
+  }
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function performs multilevel node bisection (i.e., tri-section).
+    It performs multiple bisections and selects the best. */
+/*************************************************************************/
+void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts)
+{
+  idx_t i, mincut, nruns=5;
+  graph_t *cgraph; 
+  idx_t *bestwhere;
+
+  /* if the graph is small, just find a single vertex separator */
+  if (graph->nvtxs < 5000) {
+    MlevelNodeBisectionL1(ctrl, graph, niparts);
+    return;
+  }
+
+  WCOREPUSH;
+
+  ctrl->CoarsenTo = gk_max(100, graph->nvtxs/30);
+
+  cgraph = CoarsenGraphNlevels(ctrl, graph, 4);
+
+  bestwhere = iwspacemalloc(ctrl, cgraph->nvtxs);
+
+  mincut = graph->tvwgt[0];
+  for (i=0; i<nruns; i++) {
+    MlevelNodeBisectionL1(ctrl, cgraph, 0.7*niparts);
+
+    if (i == 0 || cgraph->mincut < mincut) {
+      mincut = cgraph->mincut;
+      if (i < nruns-1)
+        icopy(cgraph->nvtxs, cgraph->where, bestwhere);
+    }
+
+    if (mincut == 0)
+      break;
+
+    if (i < nruns-1) 
+      FreeRData(cgraph);
+  }
+
+  if (mincut != cgraph->mincut) 
+    icopy(cgraph->nvtxs, bestwhere, cgraph->where);
+
+  WCOREPOP;
+
+  Refine2WayNode(ctrl, graph, cgraph);
+
+}
+
+
+/*************************************************************************/
+/*! The top-level routine of the actual multilevel node bisection */
+/*************************************************************************/
+void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts)
+{
+  graph_t *cgraph;
+
+  ctrl->CoarsenTo = graph->nvtxs/8;
+  if (ctrl->CoarsenTo > 100)
+    ctrl->CoarsenTo = 100;
+  else if (ctrl->CoarsenTo < 40)
+    ctrl->CoarsenTo = 40;
+
+  cgraph = CoarsenGraph(ctrl, graph);
+
+  niparts = gk_max(1, (cgraph->nvtxs <= ctrl->CoarsenTo ? niparts/2: niparts));
+  /*niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS);*/
+  InitSeparator(ctrl, cgraph, niparts);
+
+  Refine2WayNode(ctrl, graph, cgraph);
+}
+
+
+/*************************************************************************/
+/*! This function takes a graph and a tri-section (left, right, separator)
+    and splits it into two graphs. 
+    
+    This function relies on the fact that adjwgt is all equal to 1.
+*/
+/*************************************************************************/
+void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, 
+         graph_t **r_rgraph)
+{
+  idx_t i, ii, j, k, l, istart, iend, mypart, nvtxs, snvtxs[3], snedges[3];
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind;
+  idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2];
+  idx_t *rename;
+  idx_t *auxadjncy;
+  graph_t *lgraph, *rgraph;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr));
+
+  nvtxs   = graph->nvtxs;
+  xadj    = graph->xadj;
+  vwgt    = graph->vwgt;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  label   = graph->label;
+  where   = graph->where;
+  bndptr  = graph->bndptr;
+  bndind  = graph->bndind;
+  ASSERT(bndptr != NULL);
+
+  rename = iwspacemalloc(ctrl, nvtxs);
+  
+  snvtxs[0] = snvtxs[1] = snvtxs[2] = snedges[0] = snedges[1] = snedges[2] = 0;
+  for (i=0; i<nvtxs; i++) {
+    k = where[i];
+    rename[i] = snvtxs[k]++;
+    snedges[k] += xadj[i+1]-xadj[i];
+  }
+
+  lgraph      = SetupSplitGraph(graph, snvtxs[0], snedges[0]);
+  sxadj[0]    = lgraph->xadj;
+  svwgt[0]    = lgraph->vwgt;
+  sadjncy[0]  = lgraph->adjncy; 
+  sadjwgt[0]  = lgraph->adjwgt; 
+  slabel[0]   = lgraph->label;
+
+  rgraph      = SetupSplitGraph(graph, snvtxs[1], snedges[1]);
+  sxadj[1]    = rgraph->xadj;
+  svwgt[1]    = rgraph->vwgt;
+  sadjncy[1]  = rgraph->adjncy; 
+  sadjwgt[1]  = rgraph->adjwgt; 
+  slabel[1]   = rgraph->label;
+
+  /* Go and use bndptr to also mark the boundary nodes in the two partitions */
+  for (ii=0; ii<graph->nbnd; ii++) {
+    i = bndind[ii];
+    for (j=xadj[i]; j<xadj[i+1]; j++)
+      bndptr[adjncy[j]] = 1;
+  }
+
+  snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0;
+  sxadj[0][0] = sxadj[1][0] = 0;
+  for (i=0; i<nvtxs; i++) {
+    if ((mypart = where[i]) == 2)
+      continue;
+
+    istart = xadj[i];
+    iend   = xadj[i+1];
+    if (bndptr[i] == -1) { /* This is an interior vertex */
+      auxadjncy = sadjncy[mypart] + snedges[mypart] - istart;
+      for(j=istart; j<iend; j++) 
+        auxadjncy[j] = adjncy[j];
+      snedges[mypart] += iend-istart;
+    }
+    else {
+      auxadjncy = sadjncy[mypart];
+      l = snedges[mypart];
+      for (j=istart; j<iend; j++) {
+        k = adjncy[j];
+        if (where[k] == mypart) 
+          auxadjncy[l++] = k;
+      }
+      snedges[mypart] = l;
+    }
+
+    svwgt[mypart][snvtxs[mypart]]    = vwgt[i];
+    slabel[mypart][snvtxs[mypart]]   = label[i];
+    sxadj[mypart][++snvtxs[mypart]]  = snedges[mypart];
+  }
+
+  for (mypart=0; mypart<2; mypart++) {
+    iend = snedges[mypart];
+    iset(iend, 1, sadjwgt[mypart]);
+
+    auxadjncy = sadjncy[mypart];
+    for (i=0; i<iend; i++) 
+      auxadjncy[i] = rename[auxadjncy[i]];
+  }
+
+  lgraph->nvtxs  = snvtxs[0];
+  lgraph->nedges = snedges[0];
+  rgraph->nvtxs  = snvtxs[1];
+  rgraph->nedges = snedges[1];
+
+  SetupGraph_tvwgt(lgraph);
+  SetupGraph_tvwgt(rgraph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr));
+
+  *r_lgraph = lgraph;
+  *r_rgraph = rgraph;
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function takes a graph and generates a set of graphs, each of 
+    which is a connected component in the original graph.
+
+    This function relies on the fact that adjwgt is all equal to 1.
+
+    \param ctrl stores run state info.
+    \param graph is the graph to be split.
+    \param ncmps is the number of connected components.
+    \param cptr is an array of size ncmps+1 that marks the start and end
+           locations of the vertices in cind that make up the respective
+           components (i.e., cptr, cind is in CSR format).
+    \param cind is an array of size equal to the number of vertices in 
+           the original graph and stores the vertices that belong to each
+           connected component.
+
+    \returns an array of subgraphs corresponding to the extracted subgraphs.
+*/
+/*************************************************************************/
+graph_t **SplitGraphOrderCC(ctrl_t *ctrl, graph_t *graph, idx_t ncmps, 
+              idx_t *cptr, idx_t *cind)
+{
+  idx_t i, ii, iii, j, k, l, istart, iend, mypart, nvtxs, snvtxs, snedges;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr, *bndind;
+  idx_t *sxadj, *svwgt, *sadjncy, *sadjwgt, *slabel;
+  idx_t *rename;
+  idx_t *auxadjncy;
+  graph_t **sgraphs;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr));
+
+  nvtxs   = graph->nvtxs;
+  xadj    = graph->xadj;
+  vwgt    = graph->vwgt;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  label   = graph->label;
+  where   = graph->where;
+  bndptr  = graph->bndptr;
+  bndind  = graph->bndind;
+  ASSERT(bndptr != NULL);
+
+  /* Go and use bndptr to also mark the boundary nodes in the two partitions */
+  for (ii=0; ii<graph->nbnd; ii++) {
+    i = bndind[ii];
+    for (j=xadj[i]; j<xadj[i+1]; j++)
+      bndptr[adjncy[j]] = 1;
+  }
+
+  rename = iwspacemalloc(ctrl, nvtxs);
+  
+  sgraphs = (graph_t **)gk_malloc(sizeof(graph_t *)*ncmps, "SplitGraphOrderCC: sgraphs");
+
+  /* Go and split the graph a component at a time */
+  for (iii=0; iii<ncmps; iii++) {
+    irandArrayPermute(cptr[iii+1]-cptr[iii], cind+cptr[iii], cptr[iii+1]-cptr[iii], 0);
+    snvtxs = snedges = 0;
+    for (j=cptr[iii]; j<cptr[iii+1]; j++) {
+      i = cind[j];
+      rename[i] = snvtxs++;
+      snedges += xadj[i+1]-xadj[i];
+    }
+
+    sgraphs[iii] = SetupSplitGraph(graph, snvtxs, snedges);
+
+    sxadj    = sgraphs[iii]->xadj;
+    svwgt    = sgraphs[iii]->vwgt;
+    sadjncy  = sgraphs[iii]->adjncy;
+    sadjwgt  = sgraphs[iii]->adjwgt;
+    slabel   = sgraphs[iii]->label;
+
+    snvtxs = snedges = sxadj[0] = 0;
+    for (ii=cptr[iii]; ii<cptr[iii+1]; ii++) {
+      i = cind[ii];
+
+      istart = xadj[i];
+      iend   = xadj[i+1];
+      if (bndptr[i] == -1) { /* This is an interior vertex */
+        auxadjncy = sadjncy + snedges - istart;
+        for(j=istart; j<iend; j++) 
+          auxadjncy[j] = adjncy[j];
+        snedges += iend-istart;
+      }
+      else {
+        l = snedges;
+        for (j=istart; j<iend; j++) {
+          k = adjncy[j];
+          if (where[k] != 2) 
+            sadjncy[l++] = k;
+        }
+        snedges = l;
+      }
+
+      svwgt[snvtxs]    = vwgt[i];
+      slabel[snvtxs]   = label[i];
+      sxadj[++snvtxs]  = snedges;
+    }
+
+    iset(snedges, 1, sadjwgt);
+    for (i=0; i<snedges; i++) 
+      sadjncy[i] = rename[sadjncy[i]];
+
+    sgraphs[iii]->nvtxs  = snvtxs;
+    sgraphs[iii]->nedges = snedges;
+
+    SetupGraph_tvwgt(sgraphs[iii]);
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr));
+
+  WCOREPOP;
+
+  return sgraphs;
+}
+
+
+/*************************************************************************/
+/*! This function uses MMD to order the graph. The vertices are numbered
+    from lastvtx downwards. */
+/*************************************************************************/
+void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx)
+{
+  idx_t i, j, k, nvtxs, nofsub, firstvtx;
+  idx_t *xadj, *adjncy, *label;
+  idx_t *perm, *iperm, *head, *qsize, *list, *marker;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Relabel the vertices so that it starts from 1 */
+  k = xadj[nvtxs];
+  for (i=0; i<k; i++)
+    adjncy[i]++;
+  for (i=0; i<nvtxs+1; i++)
+    xadj[i]++;
+
+  perm   = iwspacemalloc(ctrl, nvtxs+5);
+  iperm  = iwspacemalloc(ctrl, nvtxs+5);
+  head   = iwspacemalloc(ctrl, nvtxs+5);
+  qsize  = iwspacemalloc(ctrl, nvtxs+5);
+  list   = iwspacemalloc(ctrl, nvtxs+5);
+  marker = iwspacemalloc(ctrl, nvtxs+5);
+
+  genmmd(nvtxs, xadj, adjncy, iperm, perm, 1, head, qsize, list, marker, IDX_MAX, &nofsub);
+
+  label = graph->label;
+  firstvtx = lastvtx-nvtxs;
+  for (i=0; i<nvtxs; i++)
+    order[label[i]] = firstvtx+iperm[i]-1;
+
+  /* Relabel the vertices so that it starts from 0 */
+  for (i=0; i<nvtxs+1; i++)
+    xadj[i]--;
+  k = xadj[nvtxs];
+  for (i=0; i<k; i++)
+    adjncy[i]--;
+
+  WCOREPOP;
+}
+
+
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/options.c b/3rdParty/metis/metis-5.1.0/libmetis/options.c
new file mode 100644
index 000000000..3bc1ac93c
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/options.c
@@ -0,0 +1,532 @@
+/**
+  \file
+  \brief This file contains various routines for dealing with options and ctrl_t.
+
+  \date   Started 5/12/2011
+  \author George  
+  \author Copyright 1997-2011, Regents of the University of Minnesota 
+  \version\verbatim $Id: options.c 13901 2013-03-24 16:17:03Z karypis $ \endverbatim
+  */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function creates and sets the run parameters (ctrl_t) */
+/*************************************************************************/
+ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, 
+            real_t *tpwgts, real_t *ubvec)
+{
+  idx_t i, j;
+  ctrl_t *ctrl;
+
+  ctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "SetupCtrl: ctrl");
+  
+  memset((void *)ctrl, 0, sizeof(ctrl_t));
+
+  switch (optype) {
+    case METIS_OP_PMETIS:
+      ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT);
+      ctrl->rtype   = METIS_RTYPE_FM;
+      ctrl->ncuts   = GETOPTION(options, METIS_OPTION_NCUTS,   1);
+      ctrl->niter   = GETOPTION(options, METIS_OPTION_NITER,   10);
+
+      if (ncon == 1) {
+        ctrl->iptype    = GETOPTION(options, METIS_OPTION_IPTYPE,  METIS_IPTYPE_GROW);
+        ctrl->ufactor   = GETOPTION(options, METIS_OPTION_UFACTOR, PMETIS_DEFAULT_UFACTOR);
+        ctrl->CoarsenTo = 20;
+      }
+      else {
+        ctrl->iptype    = GETOPTION(options, METIS_OPTION_IPTYPE,  METIS_IPTYPE_RANDOM);
+        ctrl->ufactor   = GETOPTION(options, METIS_OPTION_UFACTOR, MCPMETIS_DEFAULT_UFACTOR);
+        ctrl->CoarsenTo = 100;
+      }
+
+      break;
+
+
+    case METIS_OP_KMETIS:
+      ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT);
+      ctrl->iptype  = METIS_IPTYPE_METISRB;
+      ctrl->rtype   = METIS_RTYPE_GREEDY;
+      ctrl->ncuts   = GETOPTION(options, METIS_OPTION_NCUTS,   1);
+      ctrl->niter   = GETOPTION(options, METIS_OPTION_NITER,   10);
+      ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, KMETIS_DEFAULT_UFACTOR);
+      ctrl->minconn = GETOPTION(options, METIS_OPTION_MINCONN, 0);
+      ctrl->contig  = GETOPTION(options, METIS_OPTION_CONTIG,  0);
+      break;
+
+
+    case METIS_OP_OMETIS:
+      ctrl->objtype  = GETOPTION(options, METIS_OPTION_OBJTYPE,  METIS_OBJTYPE_NODE);
+      ctrl->rtype    = GETOPTION(options, METIS_OPTION_RTYPE,    METIS_RTYPE_SEP1SIDED);
+      ctrl->iptype   = GETOPTION(options, METIS_OPTION_IPTYPE,   METIS_IPTYPE_EDGE);
+      ctrl->nseps    = GETOPTION(options, METIS_OPTION_NSEPS,    1);
+      ctrl->niter    = GETOPTION(options, METIS_OPTION_NITER,    10);
+      ctrl->ufactor  = GETOPTION(options, METIS_OPTION_UFACTOR,  OMETIS_DEFAULT_UFACTOR);
+      ctrl->compress = GETOPTION(options, METIS_OPTION_COMPRESS, 1);
+      ctrl->ccorder  = GETOPTION(options, METIS_OPTION_CCORDER,  0);
+      ctrl->pfactor  = 0.1*GETOPTION(options, METIS_OPTION_PFACTOR,  0);
+
+      ctrl->CoarsenTo = 100;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown optype of %d\n", optype);
+  }
+
+  /* common options */
+  ctrl->ctype   = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM);
+  ctrl->no2hop  = GETOPTION(options, METIS_OPTION_NO2HOP, 0);
+  ctrl->seed    = GETOPTION(options, METIS_OPTION_SEED, -1);
+  ctrl->dbglvl  = GETOPTION(options, METIS_OPTION_DBGLVL, 0);
+  ctrl->numflag = GETOPTION(options, METIS_OPTION_NUMBERING, 0);
+
+  /* set non-option information */
+  ctrl->optype  = optype;
+  ctrl->ncon    = ncon;
+  ctrl->nparts  = nparts;
+  ctrl->maxvwgt = ismalloc(ncon, 0, "SetupCtrl: maxvwgt");
+
+  /* setup the target partition weights */
+  if (ctrl->optype != METIS_OP_OMETIS) {
+    ctrl->tpwgts = rmalloc(nparts*ncon, "SetupCtrl: ctrl->tpwgts");
+    if (tpwgts) {
+      rcopy(nparts*ncon, tpwgts, ctrl->tpwgts);
+    }
+    else {
+      for (i=0; i<nparts; i++) {
+        for (j=0; j<ncon; j++)
+          ctrl->tpwgts[i*ncon+j] = 1.0/nparts;
+      }
+    }
+  }
+  else {  /* METIS_OP_OMETIS */
+    /* this is required to allow the pijbm to be defined properly for
+       the edge-based refinement during initial partitioning */
+    ctrl->tpwgts = rsmalloc(2, .5,  "SetupCtrl: ctrl->tpwgts");
+  }
+
+
+  /* setup the ubfactors */
+  ctrl->ubfactors = rsmalloc(ctrl->ncon, I2RUBFACTOR(ctrl->ufactor), "SetupCtrl: ubfactors");
+  if (ubvec)
+    rcopy(ctrl->ncon, ubvec, ctrl->ubfactors);
+  for (i=0; i<ctrl->ncon; i++)
+    ctrl->ubfactors[i] += 0.0000499;
+
+  /* Allocate memory for balance multipliers. 
+     Note that for PMETIS/OMETIS routines the memory allocated is more 
+     than required as balance multipliers for 2 parts is sufficient. */
+  ctrl->pijbm = rmalloc(nparts*ncon, "SetupCtrl: ctrl->pijbm");
+
+  InitRandom(ctrl->seed);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_INFO, PrintCtrl(ctrl));
+
+  if (!CheckParams(ctrl)) {
+    FreeCtrl(&ctrl);
+    return NULL;
+  }
+  else {
+    return ctrl;
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the per-partition/constraint balance multipliers */
+/*************************************************************************/
+void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j;
+
+  for (i=0; i<ctrl->nparts; i++) {
+    for (j=0; j<graph->ncon; j++)
+      ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/ctrl->tpwgts[i*graph->ncon+j];
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the per-partition/constraint balance multipliers */
+/*************************************************************************/
+void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts)
+{
+  idx_t i, j;
+
+  for (i=0; i<2; i++) {
+    for (j=0; j<graph->ncon; j++)
+      ctrl->pijbm[i*graph->ncon+j] = graph->invtvwgt[j]/tpwgts[i*graph->ncon+j];
+  }
+}
+
+
+/*************************************************************************/
+/*! This function prints the various control fields */
+/*************************************************************************/
+void PrintCtrl(ctrl_t *ctrl)
+{
+  idx_t i, j, modnum;
+
+  printf(" Runtime parameters:\n");
+
+  printf("   Objective type: ");
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      printf("METIS_OBJTYPE_CUT\n");
+      break;
+    case METIS_OBJTYPE_VOL:
+      printf("METIS_OBJTYPE_VOL\n");
+      break;
+    case METIS_OBJTYPE_NODE:
+      printf("METIS_OBJTYPE_NODE\n");
+      break;
+    default:
+      printf("Unknown!\n");
+  }
+
+  printf("   Coarsening type: ");
+  switch (ctrl->ctype) {
+    case METIS_CTYPE_RM:
+      printf("METIS_CTYPE_RM\n");
+      break;
+    case METIS_CTYPE_SHEM:
+      printf("METIS_CTYPE_SHEM\n");
+      break;
+    default:
+      printf("Unknown!\n");
+  }
+
+  printf("   Initial partitioning type: ");
+  switch (ctrl->iptype) {
+    case METIS_IPTYPE_GROW:
+      printf("METIS_IPTYPE_GROW\n");
+      break;
+    case METIS_IPTYPE_RANDOM:
+      printf("METIS_IPTYPE_RANDOM\n");
+      break;
+    case METIS_IPTYPE_EDGE:
+      printf("METIS_IPTYPE_EDGE\n");
+      break;
+    case METIS_IPTYPE_NODE:
+      printf("METIS_IPTYPE_NODE\n");
+      break;
+    case METIS_IPTYPE_METISRB:
+      printf("METIS_IPTYPE_METISRB\n");
+      break;
+    default:
+      printf("Unknown!\n");
+  }
+
+  printf("   Refinement type: ");
+  switch (ctrl->rtype) {
+    case METIS_RTYPE_FM:
+      printf("METIS_RTYPE_FM\n");
+      break;
+    case METIS_RTYPE_GREEDY:
+      printf("METIS_RTYPE_GREEDY\n");
+      break;
+    case METIS_RTYPE_SEP2SIDED:
+      printf("METIS_RTYPE_SEP2SIDED\n");
+      break;
+    case METIS_RTYPE_SEP1SIDED:
+      printf("METIS_RTYPE_SEP1SIDED\n");
+      break;
+    default:
+      printf("Unknown!\n");
+  }
+
+  printf("   Perform a 2-hop matching: %s\n", (ctrl->no2hop ? "Yes" : "No"));
+
+  printf("   Number of balancing constraints: %"PRIDX"\n", ctrl->ncon);
+  printf("   Number of refinement iterations: %"PRIDX"\n", ctrl->niter);
+  printf("   Random number seed: %"PRIDX"\n", ctrl->seed);
+
+  if (ctrl->optype == METIS_OP_OMETIS) {
+    printf("   Number of separators: %"PRIDX"\n", ctrl->nseps);
+    printf("   Compress graph prior to ordering: %s\n", (ctrl->compress ? "Yes" : "No"));
+    printf("   Detect & order connected components separately: %s\n", (ctrl->ccorder ? "Yes" : "No"));
+    printf("   Prunning factor for high degree vertices: %"PRREAL"\n", ctrl->pfactor);
+  }
+  else {
+    printf("   Number of partitions: %"PRIDX"\n", ctrl->nparts);
+    printf("   Number of cuts: %"PRIDX"\n", ctrl->ncuts);
+    printf("   User-supplied ufactor: %"PRIDX"\n", ctrl->ufactor);
+
+    if (ctrl->optype == METIS_OP_KMETIS) {
+      printf("   Minimize connectivity: %s\n", (ctrl->minconn ? "Yes" : "No"));
+      printf("   Create contigous partitions: %s\n", (ctrl->contig ? "Yes" : "No"));
+    }
+
+    modnum = (ctrl->ncon==1 ? 5 : (ctrl->ncon==2 ? 3 : (ctrl->ncon==3 ? 2 : 1)));
+    printf("   Target partition weights: ");
+    for (i=0; i<ctrl->nparts; i++) {
+      if (i%modnum == 0)
+        printf("\n     ");
+      printf("%4"PRIDX"=[", i);
+      for (j=0; j<ctrl->ncon; j++) 
+        printf("%s%.2e", (j==0 ? "" : " "), (double)ctrl->tpwgts[i*ctrl->ncon+j]);
+      printf("]");
+    }
+    printf("\n");
+  }
+
+  printf("   Allowed maximum load imbalance: ");
+  for (i=0; i<ctrl->ncon; i++) 
+    printf("%.3"PRREAL" ", ctrl->ubfactors[i]);
+  printf("\n");
+
+  printf("\n");
+}
+
+
+/*************************************************************************/
+/*! This function checks the validity of user-supplied parameters */
+/*************************************************************************/
+int CheckParams(ctrl_t *ctrl)
+{
+  idx_t i, j;
+  real_t sum;
+  mdbglvl_et  dbglvl=METIS_DBG_INFO;
+
+  switch (ctrl->optype) {
+    case METIS_OP_PMETIS:
+      if (ctrl->objtype != METIS_OBJTYPE_CUT) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
+        return 0;
+      }
+      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
+        return 0;
+      }
+      if (ctrl->iptype != METIS_IPTYPE_GROW && ctrl->iptype != METIS_IPTYPE_RANDOM) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
+        return 0;
+      }
+      if (ctrl->rtype != METIS_RTYPE_FM) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
+        return 0;
+      }
+      if (ctrl->ncuts <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n"));
+        return 0;
+      }
+      if (ctrl->niter <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
+        return 0;
+      }
+      if (ctrl->ufactor <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
+        return 0;
+      }
+      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
+        return 0;
+      }
+      if (ctrl->nparts <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
+        return 0;
+      }
+      if (ctrl->ncon <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
+        return 0;
+      }
+
+      for (i=0; i<ctrl->ncon; i++) {
+        sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon);
+        if (sum < 0.99 || sum > 1.01) {
+          IFSET(dbglvl, METIS_DBG_INFO, 
+              printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i));
+          return 0;
+        }
+      }
+      for (i=0; i<ctrl->ncon; i++) {
+        for (j=0; j<ctrl->nparts; j++) {
+          if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) {
+            IFSET(dbglvl, METIS_DBG_INFO, 
+                printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i));
+            return 0;
+          }
+        }
+      }
+
+      for (i=0; i<ctrl->ncon; i++) {
+        if (ctrl->ubfactors[i] <= 1.0) {
+          IFSET(dbglvl, METIS_DBG_INFO, 
+              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
+          return 0;
+        }
+      }
+
+      break;
+
+    case METIS_OP_KMETIS:
+      if (ctrl->objtype != METIS_OBJTYPE_CUT && ctrl->objtype != METIS_OBJTYPE_VOL) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
+        return 0;
+      }
+      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
+        return 0;
+      }
+      if (ctrl->iptype != METIS_IPTYPE_METISRB) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
+        return 0;
+      }
+      if (ctrl->rtype != METIS_RTYPE_GREEDY) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
+        return 0;
+      }
+      if (ctrl->ncuts <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n"));
+        return 0;
+      }
+      if (ctrl->niter <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
+        return 0;
+      }
+      if (ctrl->ufactor <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
+        return 0;
+      }
+      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
+        return 0;
+      }
+      if (ctrl->nparts <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
+        return 0;
+      }
+      if (ctrl->ncon <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
+        return 0;
+      }
+      if (ctrl->contig != 0 && ctrl->contig != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect contig.\n"));
+        return 0;
+      }
+      if (ctrl->minconn != 0 && ctrl->minconn != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect minconn.\n"));
+        return 0;
+      }
+
+      for (i=0; i<ctrl->ncon; i++) {
+        sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon);
+        if (sum < 0.99 || sum > 1.01) {
+          IFSET(dbglvl, METIS_DBG_INFO, 
+              printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i));
+          return 0;
+        }
+      }
+      for (i=0; i<ctrl->ncon; i++) {
+        for (j=0; j<ctrl->nparts; j++) {
+          if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) {
+            IFSET(dbglvl, METIS_DBG_INFO, 
+                printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i));
+            return 0;
+          }
+        }
+      }
+
+      for (i=0; i<ctrl->ncon; i++) {
+        if (ctrl->ubfactors[i] <= 1.0) {
+          IFSET(dbglvl, METIS_DBG_INFO, 
+              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
+          return 0;
+        }
+      }
+
+      break;
+
+
+
+    case METIS_OP_OMETIS:
+      if (ctrl->objtype != METIS_OBJTYPE_NODE) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
+        return 0;
+      }
+      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
+        return 0;
+      }
+      if (ctrl->iptype != METIS_IPTYPE_EDGE && ctrl->iptype != METIS_IPTYPE_NODE) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
+        return 0;
+      }
+      if (ctrl->rtype != METIS_RTYPE_SEP1SIDED && ctrl->rtype != METIS_RTYPE_SEP2SIDED) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
+        return 0;
+      }
+      if (ctrl->nseps <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nseps.\n"));
+        return 0;
+      }
+      if (ctrl->niter <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
+        return 0;
+      }
+      if (ctrl->ufactor <= 0) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
+        return 0;
+      }
+      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
+        return 0;
+      }
+      if (ctrl->nparts != 3) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
+        return 0;
+      }
+      if (ctrl->ncon != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
+        return 0;
+      }
+      if (ctrl->compress != 0 && ctrl->compress != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect compress.\n"));
+        return 0;
+      }
+      if (ctrl->ccorder != 0 && ctrl->ccorder != 1) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ccorder.\n"));
+        return 0;
+      }
+      if (ctrl->pfactor < 0.0 ) {
+        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect pfactor.\n"));
+        return 0;
+      }
+
+      for (i=0; i<ctrl->ncon; i++) {
+        if (ctrl->ubfactors[i] <= 1.0) {
+          IFSET(dbglvl, METIS_DBG_INFO, 
+              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
+          return 0;
+        }
+      }
+
+      break;
+
+    default:
+      IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect optype\n"));
+      return 0;
+  }
+
+  return 1;
+}
+
+  
+/*************************************************************************/
+/*! This function frees the memory associated with a ctrl_t */
+/*************************************************************************/
+void FreeCtrl(ctrl_t **r_ctrl)
+{
+  ctrl_t *ctrl = *r_ctrl;
+
+  FreeWorkSpace(ctrl);
+
+  gk_free((void **)&ctrl->tpwgts, &ctrl->pijbm, 
+          &ctrl->ubfactors, &ctrl->maxvwgt, &ctrl, LTERM);
+
+  *r_ctrl = NULL;
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/parmetis.c b/3rdParty/metis/metis-5.1.0/libmetis/parmetis.c
new file mode 100644
index 000000000..631d811bc
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/parmetis.c
@@ -0,0 +1,723 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * parmetis.c
+ *
+ * This file contains top level routines that are used by ParMETIS
+ *
+ * Started 10/14/97
+ * George
+ *
+ * $Id: parmetis.c 10481 2011-07-05 18:01:23Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point for the node ND code for ParMETIS.
+    The difference between this routine and the standard METIS_NodeND are
+    the following
+    
+    - It performs at least log2(npes) levels of nested dissection.
+    - It stores the size of the log2(npes) top-level separators in the
+      sizes array.
+*/
+/*************************************************************************/
+int METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+           idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, idx_t *sizes) 
+{
+  idx_t i, ii, j, l, nnvtxs=0;
+  graph_t *graph;
+  ctrl_t *ctrl;
+  idx_t *cptr, *cind;
+
+  ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL);
+  if (!ctrl) return METIS_ERROR_INPUT;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr));
+
+  /* compress the graph; not that compression only happens if not prunning 
+     has taken place. */
+  if (ctrl->compress) {
+    cptr = imalloc(nvtxs+1, "OMETIS: cptr");
+    cind = imalloc(nvtxs, "OMETIS: cind");
+
+    graph = CompressGraph(ctrl, nvtxs, xadj, adjncy, vwgt, cptr, cind);
+    if (graph == NULL) {
+      /* if there was no compression, cleanup the compress flag */
+      gk_free((void **)&cptr, &cind, LTERM);
+      ctrl->compress = 0;
+    }
+    else {
+      nnvtxs = graph->nvtxs;
+    }
+  }
+
+  /* if no compression, setup the graph in the normal way. */
+  if (ctrl->compress == 0) 
+    graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);
+
+
+  /* allocate workspace memory */
+  AllocateWorkSpace(ctrl, graph);
+
+
+  /* do the nested dissection ordering  */
+  iset(2*npes-1, 0, sizes);
+  MlevelNestedDissectionP(ctrl, graph, iperm, graph->nvtxs, npes, 0, sizes);
+
+
+  /* Uncompress the ordering */
+  if (ctrl->compress) { 
+    /* construct perm from iperm */
+    for (i=0; i<nnvtxs; i++)
+      perm[iperm[i]] = i; 
+    for (l=ii=0; ii<nnvtxs; ii++) {
+      i = perm[ii];
+      for (j=cptr[i]; j<cptr[i+1]; j++)
+        iperm[cind[j]] = l++;
+    }
+
+    gk_free((void **)&cptr, &cind, LTERM);
+  }
+
+
+  for (i=0; i<nvtxs; i++)
+    perm[iperm[i]] = i;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));
+
+  /* clean up */
+  FreeCtrl(&ctrl);
+
+  return METIS_OK;
+}
+
+
+/*************************************************************************/
+/*! This function is similar to MlevelNestedDissection with the difference
+    that it also records separator sizes for the top log2(npes) levels */
+/**************************************************************************/
+void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order, 
+         idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes)
+{
+  idx_t i, j, nvtxs, nbnd;
+  idx_t *label, *bndind;
+  graph_t *lgraph, *rgraph;
+
+  nvtxs = graph->nvtxs;
+
+  if (nvtxs == 0) {
+    FreeGraph(&graph);
+    return;
+  }
+
+  MlevelNodeBisectionMultiple(ctrl, graph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO, 
+      printf("Nvtxs: %6"PRIDX", [%6"PRIDX" %6"PRIDX" %6"PRIDX"]\n", 
+        graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]));
+
+  if (cpos < npes-1) {
+    sizes[2*npes-2-cpos]       = graph->pwgts[2];
+    sizes[2*npes-2-(2*cpos+1)] = graph->pwgts[1];
+    sizes[2*npes-2-(2*cpos+2)] = graph->pwgts[0];
+  }
+
+  /* Order the nodes in the separator */
+  nbnd   = graph->nbnd;
+  bndind = graph->bndind;
+  label  = graph->label;
+  for (i=0; i<nbnd; i++) 
+    order[label[bndind[i]]] = --lastvtx;
+
+  SplitGraphOrder(ctrl, graph, &lgraph, &rgraph);
+
+  /* Free the memory of the top level graph */
+  FreeGraph(&graph);
+
+  if ((lgraph->nvtxs > MMDSWITCH || 2*cpos+2 < npes-1) && lgraph->nedges > 0) 
+    MlevelNestedDissectionP(ctrl, lgraph, order, lastvtx-rgraph->nvtxs, npes, 2*cpos+2, sizes);
+  else {
+    MMDOrder(ctrl, lgraph, order, lastvtx-rgraph->nvtxs); 
+    FreeGraph(&lgraph);
+  }
+  if ((rgraph->nvtxs > MMDSWITCH || 2*cpos+1 < npes-1) && rgraph->nedges > 0) 
+    MlevelNestedDissectionP(ctrl, rgraph, order, lastvtx, npes, 2*cpos+1, sizes);
+  else {
+    MMDOrder(ctrl, rgraph, order, lastvtx); 
+    FreeGraph(&rgraph);
+  }
+}
+
+
+/*************************************************************************/
+/*! This function bisects a graph by computing a vertex separator */
+/**************************************************************************/
+int METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, 
+           idx_t *vwgt, idx_t *options, idx_t *r_sepsize, idx_t *part) 
+{
+  idx_t i, j;
+  graph_t *graph;
+  ctrl_t *ctrl;
+
+  if ((ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL)) == NULL)
+    return METIS_ERROR_INPUT;
+
+  InitRandom(ctrl->seed);
+
+  graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);
+
+  AllocateWorkSpace(ctrl, graph);
+
+  /*============================================================
+   * Perform the bisection
+   *============================================================*/ 
+  ctrl->CoarsenTo = 100;
+
+  MlevelNodeBisectionMultiple(ctrl, graph);
+
+  *r_sepsize = graph->pwgts[2];
+  icopy(*nvtxs, graph->where, part);
+
+  FreeGraph(&graph);
+
+  FreeCtrl(&ctrl);
+
+  return METIS_OK;
+}
+
+
+/*************************************************************************/
+/*! This function is the entry point of a node-based separator refinement
+    of the nodes with an hmarker[] of 0. */
+/*************************************************************************/
+int METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, 
+           idx_t *where, idx_t *hmarker, real_t ubfactor)
+{
+  graph_t *graph;
+  ctrl_t *ctrl;
+
+  /* set up the run time parameters */
+  ctrl = SetupCtrl(METIS_OP_OMETIS, NULL, 1, 3, NULL, NULL);
+  if (!ctrl) return METIS_ERROR_INPUT;
+
+  /* set up the graph */
+  graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);
+
+  /* allocate workspace memory */
+  AllocateWorkSpace(ctrl, graph);
+
+  /* set up the memory and the input partition */
+  Allocate2WayNodePartitionMemory(ctrl, graph);
+  icopy(nvtxs, where, graph->where);
+
+  Compute2WayNodePartitionParams(ctrl, graph);
+
+  FM_2WayNodeRefine1SidedP(ctrl, graph, hmarker, ubfactor, 10); 
+  /* FM_2WayNodeRefine2SidedP(ctrl, graph, hmarker, ubfactor, 10); */
+
+  icopy(nvtxs, graph->where, where);
+
+  FreeGraph(&graph);
+  FreeCtrl(&ctrl);
+
+  return METIS_OK;
+}
+
+
+/*************************************************************************/
+/*! This function performs a node-based 1-sided FM refinement that moves
+    only nodes whose hmarker[] == -1. It is used by Parmetis. */
+/*************************************************************************/
+void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, 
+          idx_t *hmarker, real_t ubfactor, idx_t npasses)
+{
+  idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize;
+  idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
+  idx_t *mptr, *mind, *swaps, *inqueue;
+  rpq_t *queue; 
+  nrinfo_t *rinfo;
+  idx_t higain, oldgain, mincut, initcut, mincutorder;	
+  idx_t pass, from, to, limit;
+  idx_t badmaxpwgt, mindiff, newdiff;
+
+  WCOREPUSH;
+
+  ASSERT(graph->mincut == graph->pwgts[2]);
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  rinfo  = graph->nrinfo;
+
+  queue = rpqCreate(nvtxs);
+      
+  inqueue = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+  swaps   = iwspacemalloc(ctrl, nvtxs);
+  mptr    = iwspacemalloc(ctrl, nvtxs+1);
+  mind    = iwspacemalloc(ctrl, 2*nvtxs);
+
+  badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1]));
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"] "
+           "MaxPwgt[%6"PRIDX"]. ISep: %6"PRIDX"\n", 
+           pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, 
+           graph->mincut));
+
+  to = (pwgts[0] < pwgts[1] ? 1 : 0);
+  for (pass=0; pass<npasses; pass++) {
+    from = to; 
+    to   = (from+1)%2;
+
+    rpqReset(queue);
+
+    mincutorder = -1;
+    initcut = mincut = graph->mincut;
+    nbnd = graph->nbnd;
+
+    /* use the swaps array in place of the traditional perm array to save memory */
+    irandArrayPermute(nbnd, swaps, nbnd, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[swaps[ii]];
+      ASSERT(where[i] == 2);
+      if (hmarker[i] == -1 || hmarker[i] == to) {
+        rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[from]);
+        inqueue[i] = pass;
+      }
+    }
+    qsize = rpqLength(queue);
+
+    ASSERT(CheckNodeBnd(graph, nbnd));
+    ASSERT(CheckNodePartitionParams(graph));
+
+    limit = nbnd;
+
+    /******************************************************
+    * Get into the FM loop
+    *******************************************************/
+    mptr[0] = nmind = nbad = 0;
+    mindiff = abs(pwgts[0]-pwgts[1]);
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      if ((higain = rpqGetTop(queue)) == -1) 
+        break;
+
+      ASSERT(bndptr[higain] != -1);
+
+      /* The following check is to ensure we break out if there is a posibility
+         of over-running the mind array.  */
+      if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1)
+        break;
+
+      inqueue[higain] = -1;
+
+      if (pwgts[to]+vwgt[higain] > badmaxpwgt) { /* Skip this vertex */
+        if (nbad++ > limit) 
+          break; 
+        else {
+          nswaps--;
+          continue;  
+        }
+      }
+
+      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[from]);
+
+      newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[from]-rinfo[higain].edegrees[from]));
+      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
+        mincut      = pwgts[2];
+        mincutorder = nswaps;
+        mindiff     = newdiff;
+        nbad        = 0;
+      }
+      else {
+        if (nbad++ > limit) {
+          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[from]);
+          break; /* No further improvement, break out */
+        }
+      }
+
+      BNDDelete(nbnd, bndind, bndptr, higain);
+      pwgts[to] += vwgt[higain];
+      where[higain] = to;
+      swaps[nswaps] = higain;  
+
+
+      /**********************************************************
+      * Update the degrees of the affected nodes
+      ***********************************************************/
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
+          rinfo[k].edegrees[to] += vwgt[higain];
+        }
+        else if (where[k] == from) { /* This vertex is pulled into the separator */
+          ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k]));
+          BNDInsert(nbnd, bndind, bndptr, k);
+
+          mind[nmind++] = k;  /* Keep track for rollback */
+          where[k]      = 2;
+          pwgts[from]  -= vwgt[k];
+
+          edegrees = rinfo[k].edegrees;
+          edegrees[0] = edegrees[1] = 0;
+          for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+            kk = adjncy[jj];
+            if (where[kk] != 2) 
+              edegrees[where[kk]] += vwgt[kk];
+            else {
+              oldgain = vwgt[kk]-rinfo[kk].edegrees[from];
+              rinfo[kk].edegrees[from] -= vwgt[k];
+
+              /* Update the gain of this node if it was not skipped */
+              if (inqueue[kk] == pass)
+                rpqUpdate(queue, kk, oldgain+vwgt[k]); 
+            }
+          }
+
+          /* Insert the new vertex into the priority queue. Safe due to one-sided moves */
+          if (hmarker[k] == -1 || hmarker[k] == to) {
+            rpqInsert(queue, k, vwgt[k]-edegrees[from]);
+            inqueue[k] = pass;
+          }
+        }
+      }
+      mptr[nswaps+1] = nmind;
+
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,
+            printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", 
+                   higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), 
+                   vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit));
+
+    }
+
+
+    /****************************************************************
+    * Roll back computation 
+    *****************************************************************/
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      ASSERT(CheckNodePartitionParams(graph));
+      ASSERT(where[higain] == to);
+
+      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
+      where[higain] = 2;
+      BNDInsert(nbnd, bndind, bndptr, higain);
+
+      edegrees = rinfo[higain].edegrees;
+      edegrees[0] = edegrees[1] = 0;
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) 
+          rinfo[k].edegrees[to] -= vwgt[higain];
+        else
+          edegrees[where[k]] += vwgt[k];
+      }
+
+      /* Push nodes out of the separator */
+      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
+        k = mind[j];
+        ASSERT(where[k] == 2);
+        where[k] = from;
+        INC_DEC(pwgts[from], pwgts[2], vwgt[k]);
+        BNDDelete(nbnd, bndind, bndptr, k);
+        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+          kk = adjncy[jj];
+          if (where[kk] == 2) 
+            rinfo[kk].edegrees[from] += vwgt[k];
+        }
+      }
+    }
+
+    ASSERT(mincut == pwgts[2]);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+      printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX", QSIZE: %6"PRIDX"\n", 
+          mincut, mincutorder, pwgts[0], pwgts[1], nbnd, qsize));
+
+    graph->mincut = mincut;
+    graph->nbnd   = nbnd;
+
+    if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut))
+      break;
+  }
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function performs a node-based (two-sided) FM refinement that 
+    moves only nodes whose hmarker[] == -1. It is used by Parmetis. */
+/*************************************************************************/
+void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, 
+          idx_t *hmarker, real_t ubfactor, idx_t npasses)
+{
+  idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind;
+  idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
+  idx_t *mptr, *mind, *moved, *swaps;
+  rpq_t *queues[2]; 
+  nrinfo_t *rinfo;
+  idx_t higain, oldgain, mincut, initcut, mincutorder;	
+  idx_t pass, to, other, limit;
+  idx_t badmaxpwgt, mindiff, newdiff;
+  idx_t u[2], g[2];
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  rinfo  = graph->nrinfo;
+
+  queues[0] = rpqCreate(nvtxs);
+  queues[1] = rpqCreate(nvtxs);
+
+  moved = iwspacemalloc(ctrl, nvtxs);
+  swaps = iwspacemalloc(ctrl, nvtxs);
+  mptr  = iwspacemalloc(ctrl, nvtxs+1);
+  mind  = iwspacemalloc(ctrl, 2*nvtxs);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));
+
+  badmaxpwgt = (idx_t)(ubfactor*gk_max(pwgts[0], pwgts[1]));
+
+  for (pass=0; pass<npasses; pass++) {
+    iset(nvtxs, -1, moved);
+    rpqReset(queues[0]);
+    rpqReset(queues[1]);
+
+    mincutorder = -1;
+    initcut = mincut = graph->mincut;
+    nbnd = graph->nbnd;
+
+    /* use the swaps array in place of the traditional perm array to save memory */
+    irandArrayPermute(nbnd, swaps, nbnd, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[swaps[ii]];
+      ASSERT(where[i] == 2);
+      if (hmarker[i] == -1) {
+        rpqInsert(queues[0], i, vwgt[i]-rinfo[i].edegrees[1]);
+        rpqInsert(queues[1], i, vwgt[i]-rinfo[i].edegrees[0]);
+        moved[i] = -5;
+      }
+      else if (hmarker[i] != 2) {
+        rpqInsert(queues[hmarker[i]], i, vwgt[i]-rinfo[i].edegrees[(hmarker[i]+1)%2]);
+        moved[i] = -(10+hmarker[i]);
+      }
+    }
+
+    ASSERT(CheckNodeBnd(graph, nbnd));
+    ASSERT(CheckNodePartitionParams(graph));
+
+    limit = nbnd;
+
+    /******************************************************
+    * Get into the FM loop
+    *******************************************************/
+    mptr[0] = nmind = 0;
+    mindiff = abs(pwgts[0]-pwgts[1]);
+    to = (pwgts[0] < pwgts[1] ? 0 : 1);
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      u[0] = rpqSeeTopVal(queues[0]);  
+      u[1] = rpqSeeTopVal(queues[1]);
+      if (u[0] != -1 && u[1] != -1) {
+        g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1];
+        g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0];
+
+        to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); 
+
+        if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) 
+          to = (to+1)%2;
+      }
+      else if (u[0] == -1 && u[1] == -1) {
+        break;
+      }
+      else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) {
+        to = 0;
+      }
+      else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) {
+        to = 1;
+      }
+      else
+        break;
+
+      other = (to+1)%2;
+
+      higain = rpqGetTop(queues[to]);
+
+      /* Delete its matching entry in the other queue */
+      if (moved[higain] == -5) 
+        rpqDelete(queues[other], higain);
+
+      ASSERT(bndptr[higain] != -1);
+
+      /* The following check is to ensure we break out if there is a posibility
+         of over-running the mind array.  */
+      if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1)
+        break;
+
+      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]);
+
+      newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other]));
+      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
+        mincut      = pwgts[2];
+        mincutorder = nswaps;
+        mindiff     = newdiff;
+      }
+      else {
+        if (nswaps - mincutorder > limit) {
+          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]);
+          break; /* No further improvement, break out */
+        }
+      }
+
+      BNDDelete(nbnd, bndind, bndptr, higain);
+      pwgts[to] += vwgt[higain];
+      where[higain] = to;
+      moved[higain] = nswaps;
+      swaps[nswaps] = higain;  
+
+
+      /**********************************************************
+      * Update the degrees of the affected nodes
+      ***********************************************************/
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
+          oldgain = vwgt[k]-rinfo[k].edegrees[to];
+          rinfo[k].edegrees[to] += vwgt[higain];
+          if (moved[k] == -5 || moved[k] == -(10+other)) 
+            rpqUpdate(queues[other], k, oldgain-vwgt[higain]);
+        }
+        else if (where[k] == other) { /* This vertex is pulled into the separator */
+          ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k]));
+          BNDInsert(nbnd, bndind, bndptr, k);
+
+          mind[nmind++] = k;  /* Keep track for rollback */
+          where[k] = 2;
+          pwgts[other] -= vwgt[k];
+
+          edegrees = rinfo[k].edegrees;
+          edegrees[0] = edegrees[1] = 0;
+          for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+            kk = adjncy[jj];
+            if (where[kk] != 2) 
+              edegrees[where[kk]] += vwgt[kk];
+            else {
+              oldgain = vwgt[kk]-rinfo[kk].edegrees[other];
+              rinfo[kk].edegrees[other] -= vwgt[k];
+              if (moved[kk] == -5 || moved[kk] == -(10+to))
+                rpqUpdate(queues[to], kk, oldgain+vwgt[k]);
+            }
+          }
+
+          /* Insert the new vertex into the priority queue (if it has not been moved). */
+          if (moved[k] == -1 && (hmarker[k] == -1 || hmarker[k] == to)) {
+            rpqInsert(queues[to], k, vwgt[k]-edegrees[other]);
+            moved[k] = -(10+to);
+          }
+#ifdef FULLMOVES  /* this does not work as well as the above partial one */
+          if (moved[k] == -1) {
+            if (hmarker[k] == -1) {
+              rpqInsert(queues[0], k, vwgt[k]-edegrees[1]);
+              rpqInsert(queues[1], k, vwgt[k]-edegrees[0]);
+              moved[k] = -5;
+            }
+            else if (hmarker[k] != 2) {
+              rpqInsert(queues[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]);
+              moved[k] = -(10+hmarker[k]);
+            }
+          }
+#endif
+        }
+      }
+      mptr[nswaps+1] = nmind;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,
+            printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] "
+                   "[%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", 
+                   higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], 
+                   pwgts[0], pwgts[1], pwgts[2]));
+
+    }
+
+
+    /****************************************************************
+    * Roll back computation 
+    *****************************************************************/
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      ASSERT(CheckNodePartitionParams(graph));
+
+      to = where[higain];
+      other = (to+1)%2;
+      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
+      where[higain] = 2;
+      BNDInsert(nbnd, bndind, bndptr, higain);
+
+      edegrees = rinfo[higain].edegrees;
+      edegrees[0] = edegrees[1] = 0;
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) 
+          rinfo[k].edegrees[to] -= vwgt[higain];
+        else
+          edegrees[where[k]] += vwgt[k];
+      }
+
+      /* Push nodes out of the separator */
+      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
+        k = mind[j];
+        ASSERT(where[k] == 2);
+        where[k] = other;
+        INC_DEC(pwgts[other], pwgts[2], vwgt[k]);
+        BNDDelete(nbnd, bndind, bndptr, k);
+        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+          kk = adjncy[jj];
+          if (where[kk] == 2) 
+            rinfo[kk].edegrees[other] += vwgt[k];
+        }
+      }
+    }
+
+    ASSERT(mincut == pwgts[2]);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+      printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd));
+
+    graph->mincut = mincut;
+    graph->nbnd = nbnd;
+
+    if (mincutorder == -1 || mincut >= initcut)
+      break;
+  }
+
+  rpqDestroy(queues[0]);
+  rpqDestroy(queues[1]);
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/pmetis.c b/3rdParty/metis/metis-5.1.0/libmetis/pmetis.c
new file mode 100644
index 000000000..d32e84921
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/pmetis.c
@@ -0,0 +1,387 @@
+/**
+\file
+\brief This file contains the top level routines for the multilevel recursive bisection 
+       algorithm PMETIS.
+
+\date   Started 7/24/1997
+\author George  
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version\verbatim $Id: pmetis.c 10513 2011-07-07 22:06:03Z karypis $ \endverbatim
+*/
+
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! \ingroup api 
+    \brief Recursive partitioning routine.
+
+    This function computes a partitioning of a graph based on multilevel
+    recursive bisection. It can be used to partition a graph into \e k 
+    parts. The objective of the partitioning is to minimize the edgecut
+    subject to one or more balancing constraints.
+
+    \param[in] nvtxs is the number of vertices in the graph.
+
+    \param[in] ncon is the number of balancing constraints. For the standard
+           partitioning problem in which each vertex is either unweighted
+           or has a single weight, ncon should be 1.
+
+    \param[in] xadj is an array of size nvtxs+1 used to specify the starting
+           positions of the adjacency structure of the vertices in the
+           adjncy array.
+
+    \param[in] adjncy is an array of size to the sum of the degrees of the
+           graph that stores for each vertex the set of vertices that
+           is adjancent to.
+
+    \param[in] vwgt is an array of size nvtxs*ncon that stores the weights
+           of the vertices for each constraint. The ncon weights for the
+           ith vertex are stored in the ncon consecutive locations starting
+           at vwgt[i*ncon]. When ncon==1, a NULL value can be passed indicating
+           that all the vertices in the graph have the same weight.
+
+    \param[in] adjwgt is an array of size equal to adjncy, specifying the weight
+           for each edge (i.e., adjwgt[j] corresponds to the weight of the
+           edge stored in adjncy[j]). 
+           A NULL value can be passed indicating that all the edges in the 
+           graph have the same weight.
+
+    \param[in] nparts is the number of desired partitions.
+
+    \param[in] tpwgts is an array of size nparts*ncon that specifies the
+           desired weight for each part and constraint. The \e{target partition
+           weight} for the ith part and jth constraint is specified
+           at tpwgts[i*ncon+j] (the numbering of i and j starts from 0).
+           For each constraint, the sum of the tpwgts[] entries must be
+           1.0 (i.e., \f$ \sum_i tpwgts[i*ncon+j] = 1.0 \f$). 
+           A NULL value can be passed indicating that the graph should
+           be equally divided among the parts.
+
+    \param[in] ubvec is an array of size ncon that specifies the allowed 
+           load imbalance tolerance for each constraint. 
+           For the ith part and jth constraint the allowed weight is the 
+           ubvec[j]*tpwgts[i*ncon+j] fraction of the jth's constraint total
+           weight. The load imbalances must be greater than 1.0. 
+           A NULL value can be passed indicating that the load imbalance
+           tolerance for each constraint should be 1.001 (for ncon==1)
+           or 1.01 (for ncon>1).
+
+    \params[in] options is the array for passing additional parameters
+           in order to customize the behaviour of the partitioning
+           algorithm.
+
+    \params[out] edgecut stores the cut of the partitioning.
+
+    \params[out] part is an array of size nvtxs used to store the 
+           computed partitioning. The partition number for the ith
+           vertex is stored in part[i]. Based on the numflag parameter,
+           the numbering of the parts starts from either 0 or 1.
+
+
+    \returns 
+      \retval METIS_OK  indicates that the function returned normally.
+      \retval METIS_ERROR_INPUT indicates an input error.
+      \retval METIS_ERROR_MEMORY indicates that it could not allocate 
+              the required memory.
+           
+*/
+/*************************************************************************/
+int METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, 
+          idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, 
+          idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, 
+          idx_t *objval, idx_t *part)
+{
+  int sigrval=0, renumber=0;
+  graph_t *graph;
+  ctrl_t *ctrl;
+
+  /* set up malloc cleaning code and signal catchers */
+  if (!gk_malloc_init()) 
+    return METIS_ERROR_MEMORY;
+
+  gk_sigtrap();
+
+  if ((sigrval = gk_sigcatch()) != 0) 
+    goto SIGTHROW;
+
+
+  /* set up the run parameters */
+  ctrl = SetupCtrl(METIS_OP_PMETIS, options, *ncon, *nparts, tpwgts, ubvec);
+  if (!ctrl) {
+    gk_siguntrap();
+    return METIS_ERROR_INPUT;
+  }
+
+  /* if required, change the numbering to 0 */
+  if (ctrl->numflag == 1) {
+    Change2CNumbering(*nvtxs, xadj, adjncy);
+    renumber = 1;
+  }
+
+  /* set up the graph */
+  graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt);
+
+  /* allocate workspace memory */
+  AllocateWorkSpace(ctrl, graph);
+
+  /* start the partitioning */
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr));
+
+  *objval = MlevelRecursiveBisection(ctrl, graph, *nparts, part, ctrl->tpwgts, 0);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr));
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));
+
+  /* clean up */
+  FreeCtrl(&ctrl);
+
+SIGTHROW:
+  /* if required, change the numbering back to 1 */
+  if (renumber)
+    Change2FNumbering(*nvtxs, xadj, adjncy, part);
+
+  gk_siguntrap();
+  gk_malloc_cleanup(0);
+
+  return metis_rcode(sigrval);
+}
+
+
+/*************************************************************************/
+/*! This function is the top-level driver of the recursive bisection 
+    routine. */
+/*************************************************************************/
+idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, 
+          idx_t *part, real_t *tpwgts, idx_t fpart)
+{
+  idx_t i, j, nvtxs, ncon, objval;
+  idx_t *label, *where;
+  graph_t *lgraph, *rgraph;
+  real_t wsum, *tpwgts2;
+
+  if ((nvtxs = graph->nvtxs) == 0) {
+    printf("\t***Cannot bisect a graph with 0 vertices!\n"
+           "\t***You are trying to partition a graph into too many parts!\n");
+    return 0;
+  }
+
+  ncon = graph->ncon;
+
+  /* determine the weights of the two partitions as a function of the weight of the
+     target partition weights */
+  WCOREPUSH;
+  tpwgts2 = rwspacemalloc(ctrl, 2*ncon);
+  for (i=0; i<ncon; i++) {
+    tpwgts2[i]      = rsum((nparts>>1), tpwgts+i, ncon);
+    tpwgts2[ncon+i] = 1.0 - tpwgts2[i];
+  }
+
+  /* perform the bisection */
+  objval = MultilevelBisect(ctrl, graph, tpwgts2);
+
+  WCOREPOP;
+
+  label = graph->label;
+  where = graph->where;
+  for (i=0; i<nvtxs; i++)
+    part[label[i]] = where[i] + fpart;
+
+  if (nparts > 2) 
+    SplitGraphPart(ctrl, graph, &lgraph, &rgraph);
+
+  /* Free the memory of the top level graph */
+  FreeGraph(&graph);
+
+  /* Scale the fractions in the tpwgts according to the true weight */
+  for (i=0; i<ncon; i++) {
+    wsum = rsum((nparts>>1), tpwgts+i, ncon);
+    rscale((nparts>>1), 1.0/wsum, tpwgts+i, ncon);
+    rscale(nparts-(nparts>>1), 1.0/(1.0-wsum), tpwgts+(nparts>>1)*ncon+i, ncon);
+  }
+
+  /* Do the recursive call */
+  if (nparts > 3) {
+    objval += MlevelRecursiveBisection(ctrl, lgraph, (nparts>>1), part, 
+               tpwgts, fpart);
+    objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, 
+               tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1));
+  }
+  else if (nparts == 3) {
+    FreeGraph(&lgraph);
+    objval += MlevelRecursiveBisection(ctrl, rgraph, nparts-(nparts>>1), part, 
+               tpwgts+(nparts>>1)*ncon, fpart+(nparts>>1));
+  }
+
+
+  return objval;
+}
+
+
+/*************************************************************************/
+/*! This function performs a multilevel bisection */
+/*************************************************************************/
+idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts)
+{
+  idx_t i, niparts, bestobj=0, curobj=0, *bestwhere=NULL;
+  graph_t *cgraph;
+  real_t bestbal=0.0, curbal=0.0;
+
+  Setup2WayBalMultipliers(ctrl, graph, tpwgts);
+
+  WCOREPUSH;
+
+  if (ctrl->ncuts > 1)
+    bestwhere = iwspacemalloc(ctrl, graph->nvtxs);
+
+  for (i=0; i<ctrl->ncuts; i++) {
+    cgraph = CoarsenGraph(ctrl, graph);
+
+    niparts = (cgraph->nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS);
+    Init2WayPartition(ctrl, cgraph, tpwgts, niparts);
+
+    Refine2Way(ctrl, graph, cgraph, tpwgts);
+
+    curobj = graph->mincut;
+    curbal = ComputeLoadImbalanceDiff(graph, 2, ctrl->pijbm, ctrl->ubfactors);
+
+    if (i == 0  
+        || (curbal <= 0.0005 && bestobj > curobj) 
+        || (bestbal > 0.0005 && curbal < bestbal)) {
+      bestobj = curobj;
+      bestbal = curbal;
+      if (i < ctrl->ncuts-1)
+        icopy(graph->nvtxs, graph->where, bestwhere);
+    }
+
+    if (bestobj == 0)
+      break;
+
+    if (i < ctrl->ncuts-1)
+      FreeRData(graph);
+  }
+
+  if (bestobj != curobj) {
+    icopy(graph->nvtxs, bestwhere, graph->where);
+    Compute2WayPartitionParams(ctrl, graph);
+  }
+
+  WCOREPOP;
+
+  return bestobj;
+}
+
+
+/*************************************************************************/
+/*! This function splits a graph into two based on its bisection */
+/*************************************************************************/
+void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, 
+         graph_t **r_rgraph)
+{
+  idx_t i, j, k, l, istart, iend, mypart, nvtxs, ncon, snvtxs[2], snedges[2];
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *label, *where, *bndptr;
+  idx_t *sxadj[2], *svwgt[2], *sadjncy[2], *sadjwgt[2], *slabel[2];
+  idx_t *rename;
+  idx_t *auxadjncy, *auxadjwgt;
+  graph_t *lgraph, *rgraph;
+
+  WCOREPUSH;
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->SplitTmr));
+
+  nvtxs   = graph->nvtxs;
+  ncon    = graph->ncon;
+  xadj    = graph->xadj;
+  vwgt    = graph->vwgt;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+  label   = graph->label;
+  where   = graph->where;
+  bndptr  = graph->bndptr;
+
+  ASSERT(bndptr != NULL);
+
+  rename = iwspacemalloc(ctrl, nvtxs);
+  
+  snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0;
+  for (i=0; i<nvtxs; i++) {
+    k = where[i];
+    rename[i] = snvtxs[k]++;
+    snedges[k] += xadj[i+1]-xadj[i];
+  }
+
+  lgraph      = SetupSplitGraph(graph, snvtxs[0], snedges[0]);
+  sxadj[0]    = lgraph->xadj;
+  svwgt[0]    = lgraph->vwgt;
+  sadjncy[0]  = lgraph->adjncy; 	
+  sadjwgt[0]  = lgraph->adjwgt; 
+  slabel[0]   = lgraph->label;
+
+  rgraph      = SetupSplitGraph(graph, snvtxs[1], snedges[1]);
+  sxadj[1]    = rgraph->xadj;
+  svwgt[1]    = rgraph->vwgt;
+  sadjncy[1]  = rgraph->adjncy; 	
+  sadjwgt[1]  = rgraph->adjwgt; 
+  slabel[1]   = rgraph->label;
+
+  snvtxs[0] = snvtxs[1] = snedges[0] = snedges[1] = 0;
+  sxadj[0][0] = sxadj[1][0] = 0;
+  for (i=0; i<nvtxs; i++) {
+    mypart = where[i];
+
+    istart = xadj[i];
+    iend = xadj[i+1];
+    if (bndptr[i] == -1) { /* This is an interior vertex */
+      auxadjncy = sadjncy[mypart] + snedges[mypart] - istart;
+      auxadjwgt = sadjwgt[mypart] + snedges[mypart] - istart;
+      for(j=istart; j<iend; j++) {
+        auxadjncy[j] = adjncy[j];
+        auxadjwgt[j] = adjwgt[j]; 
+      }
+      snedges[mypart] += iend-istart;
+    }
+    else {
+      auxadjncy = sadjncy[mypart];
+      auxadjwgt = sadjwgt[mypart];
+      l = snedges[mypart];
+      for (j=istart; j<iend; j++) {
+        k = adjncy[j];
+        if (where[k] == mypart) {
+          auxadjncy[l] = k;
+          auxadjwgt[l++] = adjwgt[j]; 
+        }
+      }
+      snedges[mypart] = l;
+    }
+
+    /* copy vertex weights */
+    for (k=0; k<ncon; k++)
+      svwgt[mypart][snvtxs[mypart]*ncon+k] = vwgt[i*ncon+k];
+
+    slabel[mypart][snvtxs[mypart]]   = label[i];
+    sxadj[mypart][++snvtxs[mypart]]  = snedges[mypart];
+  }
+
+  for (mypart=0; mypart<2; mypart++) {
+    iend = sxadj[mypart][snvtxs[mypart]];
+    auxadjncy = sadjncy[mypart];
+    for (i=0; i<iend; i++) 
+      auxadjncy[i] = rename[auxadjncy[i]];
+  }
+
+  lgraph->nedges = snedges[0];
+  rgraph->nedges = snedges[1];
+
+  SetupGraph_tvwgt(lgraph);
+  SetupGraph_tvwgt(rgraph);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->SplitTmr));
+
+  *r_lgraph = lgraph;
+  *r_rgraph = rgraph;
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/proto.h b/3rdParty/metis/metis-5.1.0/libmetis/proto.h
new file mode 100644
index 000000000..f852ff59e
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/proto.h
@@ -0,0 +1,348 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * proto.h
+ *
+ * This file contains header files
+ *
+ * Started 10/19/95
+ * George
+ *
+ * $Id: proto.h 13933 2013-03-29 22:20:46Z karypis $
+ *
+ */
+
+#ifndef _LIBMETIS_PROTO_H_
+#define _LIBMETIS_PROTO_H_
+
+/* auxapi.c */
+
+/* balance.c */
+void Balance2Way(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts);
+void Bnd2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts);
+void General2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts);
+void McGeneral2WayBalance(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts);
+
+
+/* bucketsort.c */
+void BucketSortKeysInc(ctrl_t *ctrl, idx_t n, idx_t max, idx_t *keys,
+         idx_t *tperm, idx_t *perm);
+
+
+/* checkgraph.c */
+int CheckGraph(graph_t *graph, int numflag, int verbose);
+int CheckInputGraphWeights(idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy,
+        idx_t *vwgt, idx_t *vsize, idx_t *adjwgt);
+graph_t *FixGraph(graph_t *graph);
+
+
+/* coarsen.c */
+graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph);
+graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels);
+idx_t Match_RM(ctrl_t *ctrl, graph_t *graph);
+idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph);
+idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,
+          idx_t cnvtxs, size_t nunmatched);
+idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,
+          idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree);
+idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,
+          idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree);
+void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph);
+void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match);
+void CreateCoarseGraphNoMask(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match);
+void CreateCoarseGraphPerm(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs, 
+         idx_t *match, idx_t *perm);
+graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, idx_t dovsize);
+void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph);
+
+
+
+/* compress.c */
+graph_t *CompressGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, 
+             idx_t *vwgt, idx_t *cptr, idx_t *cind);
+graph_t *PruneGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t *xadj, idx_t *adjncy, 
+             idx_t *vwgt, idx_t *iperm, real_t factor);
+
+
+/* contig.c */
+idx_t FindPartitionInducedComponents(graph_t *graph, idx_t *where, 
+          idx_t *cptr, idx_t *cind);
+void ComputeBFSOrdering(ctrl_t *ctrl, graph_t *graph, idx_t *bfsperm);
+idx_t IsConnected(graph_t *graph, idx_t report);
+idx_t IsConnectedSubdomain(ctrl_t *, graph_t *, idx_t, idx_t);
+idx_t FindSepInducedComponents(ctrl_t *, graph_t *, idx_t *, idx_t *);
+void EliminateComponents(ctrl_t *ctrl, graph_t *graph);
+void MoveGroupContigForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid, 
+         idx_t *ptr, idx_t *ind);
+void MoveGroupContigForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t gid,
+         idx_t *ptr, idx_t *ind, idx_t *vmarker, idx_t *pmarker,
+         idx_t *modind);
+
+
+/* debug.c */
+idx_t ComputeCut(graph_t *graph, idx_t *where);
+idx_t ComputeVolume(graph_t *, idx_t *);
+idx_t ComputeMaxCut(graph_t *graph, idx_t nparts, idx_t *where);
+idx_t CheckBnd(graph_t *);
+idx_t CheckBnd2(graph_t *);
+idx_t CheckNodeBnd(graph_t *, idx_t);
+idx_t CheckRInfo(ctrl_t *ctrl, ckrinfo_t *rinfo);
+idx_t CheckNodePartitionParams(graph_t *);
+idx_t IsSeparable(graph_t *);
+void CheckKWayVolPartitionParams(ctrl_t *ctrl, graph_t *graph);
+
+
+/* fm.c */
+void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter);
+void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter);
+void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter);
+void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors, rpq_t **queues, 
+         idx_t *from, idx_t *cnum);
+void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, 
+         real_t deltabal, idx_t mincutorder);
+
+
+/* fortran.c */
+void Change2CNumbering(idx_t, idx_t *, idx_t *);
+void Change2FNumbering(idx_t, idx_t *, idx_t *, idx_t *);
+void Change2FNumbering2(idx_t, idx_t *, idx_t *);
+void Change2FNumberingOrder(idx_t, idx_t *, idx_t *, idx_t *, idx_t *);
+void ChangeMesh2CNumbering(idx_t n, idx_t *ptr, idx_t *ind);
+void ChangeMesh2FNumbering(idx_t n, idx_t *ptr, idx_t *ind, idx_t nvtxs,
+         idx_t *xadj, idx_t *adjncy);
+void ChangeMesh2FNumbering2(idx_t ne, idx_t nn, idx_t *ptr, idx_t *ind,
+         idx_t *epart, idx_t *npart);
+
+
+/* graph.c */
+graph_t *SetupGraph(ctrl_t *ctrl, idx_t nvtxs, idx_t ncon, idx_t *xadj, 
+             idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt);
+void SetupGraph_tvwgt(graph_t *graph);
+void SetupGraph_label(graph_t *graph);
+graph_t *SetupSplitGraph(graph_t *graph, idx_t snvtxs, idx_t snedges);
+graph_t *CreateGraph(void);
+void InitGraph(graph_t *graph);
+void FreeRData(graph_t *graph);
+void FreeGraph(graph_t **graph);
+
+
+/* initpart.c */
+void Init2WayPartition(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void InitSeparator(ctrl_t *ctrl, graph_t *graph, idx_t niparts);
+void RandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void GrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void McRandomBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void McGrowBisection(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void GrowBisectionNode(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+void GrowBisectionNode2(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niparts);
+
+
+/* kmetis.c */
+idx_t MlevelKWayPartitioning(ctrl_t *ctrl, graph_t *graph, idx_t *part);
+void InitKWayPartitioning(ctrl_t *ctrl, graph_t *graph);
+
+
+/* kwayfm.c */
+void Greedy_KWayOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode);
+void Greedy_KWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode);
+void Greedy_KWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode);
+void Greedy_McKWayCutOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode);
+void Greedy_McKWayVolOptimize(ctrl_t *ctrl, graph_t *graph, idx_t niter, 
+         real_t ffactor, idx_t omode);
+idx_t IsArticulationNode(idx_t i, idx_t *xadj, idx_t *adjncy, idx_t *where,
+          idx_t *bfslvl, idx_t *bfsind, idx_t *bfsmrk);
+void KWayVolUpdate(ctrl_t *ctrl, graph_t *graph, idx_t v, idx_t from,
+         idx_t to, ipq_t *queue, idx_t *vstatus, idx_t *r_nupd, idx_t *updptr,
+         idx_t *updind, idx_t bndtype, idx_t *vmarker, idx_t *pmarker,
+         idx_t *modind);
+
+
+/* kwayrefine.c */
+void RefineKWay(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph);
+void AllocateKWayPartitionMemory(ctrl_t *ctrl, graph_t *graph);
+void ComputeKWayPartitionParams(ctrl_t *ctrl, graph_t *graph);
+void ProjectKWayPartition(ctrl_t *ctrl, graph_t *graph);
+void ComputeKWayBoundary(ctrl_t *ctrl, graph_t *graph, idx_t bndtype);
+void ComputeKWayVolGains(ctrl_t *ctrl, graph_t *graph);
+int IsBalanced(ctrl_t *ctrl, graph_t *graph, real_t ffactor);
+
+
+/* mcutil.c */
+int rvecle(idx_t n, real_t *x, real_t *y);
+int rvecge(idx_t n, real_t *x, real_t *y);
+int rvecsumle(idx_t n, real_t *x1, real_t *x2, real_t *y);
+real_t rvecmaxdiff(idx_t n, real_t *x, real_t *y);
+int ivecle(idx_t n, idx_t *x, idx_t *z);
+int ivecge(idx_t n, idx_t *x, idx_t *z);
+int ivecaxpylez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z);
+int ivecaxpygez(idx_t n, idx_t a, idx_t *x, idx_t *y, idx_t *z);
+int BetterVBalance(idx_t ncon, real_t *itvwgt, idx_t *v_vwgt, idx_t *u1_vwgt,
+            idx_t *u2_vwgt);
+int BetterBalance2Way(idx_t n, real_t *x, real_t *y);
+int BetterBalanceKWay(idx_t ncon, idx_t *vwgt, real_t *itvwgt, idx_t a1,
+        idx_t *pt1, real_t *bm1, idx_t a2, idx_t *pt2, real_t *bm2);
+real_t ComputeLoadImbalance(graph_t *graph, idx_t nparts, real_t *pijbm);
+real_t ComputeLoadImbalanceDiff(graph_t *graph, idx_t nparts, real_t *pijbm, 
+           real_t *ubvec);
+real_t ComputeLoadImbalanceDiffVec(graph_t *graph, idx_t nparts, real_t *pijbm, 
+         real_t *ubfactors, real_t *diffvec);
+void ComputeLoadImbalanceVec(graph_t *graph, idx_t nparts, real_t *pijbm,
+             real_t *lbvec);
+
+
+/* mesh.c */
+void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon,
+          idx_t **r_xadj, idx_t **r_adjncy);
+idx_t FindCommonElements(idx_t qid, idx_t elen, idx_t *eind, idx_t *nptr,
+          idx_t *nind, idx_t *eptr, idx_t ncommon, idx_t *marker, idx_t *nbrs);
+void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t **r_xadj, 
+          idx_t **r_adjncy);
+idx_t FindCommonNodes(idx_t qid, idx_t nelmnts, idx_t *elmntids, idx_t *eptr,
+          idx_t *eind, idx_t *marker, idx_t *nbrs);
+mesh_t *CreateMesh(void);
+void InitMesh(mesh_t *mesh);  
+void FreeMesh(mesh_t **mesh);
+
+
+/* meshpart.c */
+void InduceRowPartFromColumnPart(idx_t nrows, idx_t *rowptr, idx_t *rowind,
+         idx_t *rpart, idx_t *cpart, idx_t nparts, real_t *tpwgts);
+
+
+/* minconn.c */
+void ComputeSubDomainGraph(ctrl_t *ctrl, graph_t *graph);
+void UpdateEdgeSubDomainGraph(ctrl_t *ctrl, idx_t u, idx_t v, idx_t ewgt, 
+         idx_t *r_maxndoms);
+void PrintSubDomainGraph(graph_t *graph, idx_t nparts, idx_t *where);
+void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph);
+void MoveGroupMinConnForCut(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, 
+         idx_t *ind);
+void MoveGroupMinConnForVol(ctrl_t *ctrl, graph_t *graph, idx_t to, idx_t nind, 
+         idx_t *ind, idx_t *vmarker, idx_t *pmarker, idx_t *modind);
+
+
+/* mincover.o */
+void MinCover(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *);
+idx_t MinCover_Augment(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t);
+void MinCover_Decompose(idx_t *, idx_t *, idx_t, idx_t, idx_t *, idx_t *, idx_t *);
+void MinCover_ColDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t);
+void MinCover_RowDFS(idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t);
+
+
+/* mmd.c */
+void genmmd(idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t , idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *);
+void mmdelm(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t);
+idx_t mmdint(idx_t, idx_t *xadj, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *);
+void mmdnum(idx_t, idx_t *, idx_t *, idx_t *);
+void mmdupd(idx_t, idx_t, idx_t *, idx_t *, idx_t, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t *, idx_t, idx_t *tag);
+
+
+/* ometis.c */
+void MlevelNestedDissection(ctrl_t *ctrl, graph_t *graph, idx_t *order,
+         idx_t lastvtx);
+void MlevelNestedDissectionCC(ctrl_t *ctrl, graph_t *graph, idx_t *order,
+         idx_t lastvtx);
+void MlevelNodeBisectionMultiple(ctrl_t *ctrl, graph_t *graph);
+void MlevelNodeBisectionL2(ctrl_t *ctrl, graph_t *graph, idx_t niparts);
+void MlevelNodeBisectionL1(ctrl_t *ctrl, graph_t *graph, idx_t niparts);
+void SplitGraphOrder(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, 
+         graph_t **r_rgraph);
+graph_t **SplitGraphOrderCC(ctrl_t *ctrl, graph_t *graph, idx_t ncmps,
+              idx_t *cptr, idx_t *cind);
+void MMDOrder(ctrl_t *ctrl, graph_t *graph, idx_t *order, idx_t lastvtx);
+
+
+/* options.c */
+ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, 
+            real_t *tpwgts, real_t *ubvec);
+void SetupKWayBalMultipliers(ctrl_t *ctrl, graph_t *graph);
+void Setup2WayBalMultipliers(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts);
+void PrintCtrl(ctrl_t *ctrl);
+int CheckParams(ctrl_t *ctrl);
+void FreeCtrl(ctrl_t **r_ctrl);
+
+
+/* parmetis.c */
+void MlevelNestedDissectionP(ctrl_t *ctrl, graph_t *graph, idx_t *order,
+         idx_t lastvtx, idx_t npes, idx_t cpos, idx_t *sizes);
+void FM_2WayNodeRefine1SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, 
+         real_t ubfactor, idx_t npasses);
+void FM_2WayNodeRefine2SidedP(ctrl_t *ctrl, graph_t *graph, idx_t *hmarker, 
+         real_t ubfactor, idx_t npasses);
+
+
+/* pmetis.c */
+idx_t MlevelRecursiveBisection(ctrl_t *ctrl, graph_t *graph, idx_t nparts, 
+          idx_t *part, real_t *tpwgts, idx_t fpart);
+idx_t MultilevelBisect(ctrl_t *ctrl, graph_t *graph, real_t *tpwgts);
+void SplitGraphPart(ctrl_t *ctrl, graph_t *graph, graph_t **r_lgraph, graph_t **r_rgraph);
+
+
+/* refine.c */
+void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *rtpwgts);
+void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph);
+void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph);
+void Project2WayPartition(ctrl_t *ctrl, graph_t *graph);
+
+
+/* separator.c */
+void ConstructSeparator(ctrl_t *ctrl, graph_t *graph);
+void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph);
+
+
+/* sfm.c */
+void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter);
+void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter);
+void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph);
+
+
+/* srefine.c */
+void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph);
+void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph);
+void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph);
+void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph);
+
+
+/* stat.c */
+void ComputePartitionInfoBipartite(graph_t *, idx_t, idx_t *);
+void ComputePartitionBalance(graph_t *, idx_t, idx_t *, real_t *);
+real_t ComputeElementBalance(idx_t, idx_t, idx_t *);
+
+
+/* timing.c */
+void InitTimers(ctrl_t *);
+void PrintTimers(ctrl_t *);
+
+/* util.c */
+idx_t iargmax_strd(size_t, idx_t *, idx_t);
+idx_t iargmax_nrm(size_t n, idx_t *x, real_t *y);
+idx_t iargmax2_nrm(size_t n, idx_t *x, real_t *y);
+idx_t rargmax2(size_t, real_t *);
+void InitRandom(idx_t);
+int metis_rcode(int sigrval);
+
+
+
+/* wspace.c */
+void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph);
+void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize);
+void FreeWorkSpace(ctrl_t *ctrl);
+void *wspacemalloc(ctrl_t *ctrl, size_t nbytes);
+void wspacepush(ctrl_t *ctrl);
+void wspacepop(ctrl_t *ctrl);
+idx_t *iwspacemalloc(ctrl_t *, idx_t);
+real_t *rwspacemalloc(ctrl_t *, idx_t);
+ikv_t *ikvwspacemalloc(ctrl_t *, idx_t);
+void cnbrpoolReset(ctrl_t *ctrl);
+idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs);
+void vnbrpoolReset(ctrl_t *ctrl);
+idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs);
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/refine.c b/3rdParty/metis/metis-5.1.0/libmetis/refine.c
new file mode 100644
index 000000000..c08dc2ddb
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/refine.c
@@ -0,0 +1,211 @@
+/*
+\file
+\brief This file contains the driving routines for multilevel refinement
+
+\date   Started 7/24/1997
+\author George  
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version\verbatim $Id: refine.c 10513 2011-07-07 22:06:03Z karypis $ \endverbatim
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point of refinement */
+/*************************************************************************/
+void Refine2Way(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph, real_t *tpwgts)
+{
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr));
+
+  /* Compute the parameters of the coarsest graph */
+  Compute2WayPartitionParams(ctrl, graph);
+
+  for (;;) {
+    ASSERT(CheckBnd(graph));
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr));
+
+    Balance2Way(ctrl, graph, tpwgts);
+
+    FM_2WayRefine(ctrl, graph, tpwgts, ctrl->niter); 
+
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr));
+
+    if (graph == orggraph)
+      break;
+
+    graph = graph->finer;
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr));
+    Project2WayPartition(ctrl, graph);
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr));
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr));
+}
+
+
+/*************************************************************************/
+/*! This function allocates memory for 2-way edge refinement */
+/*************************************************************************/
+void Allocate2WayPartitionMemory(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t nvtxs, ncon;
+
+  nvtxs = graph->nvtxs;
+  ncon  = graph->ncon;
+
+  graph->pwgts  = imalloc(2*ncon, "Allocate2WayPartitionMemory: pwgts");
+  graph->where  = imalloc(nvtxs, "Allocate2WayPartitionMemory: where");
+  graph->bndptr = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndptr");
+  graph->bndind = imalloc(nvtxs, "Allocate2WayPartitionMemory: bndind");
+  graph->id     = imalloc(nvtxs, "Allocate2WayPartitionMemory: id");
+  graph->ed     = imalloc(nvtxs, "Allocate2WayPartitionMemory: ed");
+}
+
+
+/*************************************************************************/
+/*! This function computes the initial id/ed */
+/*************************************************************************/
+void Compute2WayPartitionParams(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, nvtxs, ncon, nbnd, mincut, istart, iend, tid, ted, me;
+  idx_t *xadj, *vwgt, *adjncy, *adjwgt, *pwgts;
+  idx_t *where, *bndptr, *bndind, *id, *ed;
+
+  nvtxs  = graph->nvtxs;
+  ncon   = graph->ncon;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+  adjwgt = graph->adjwgt;
+
+  where  = graph->where;
+  id     = graph->id;
+  ed     = graph->ed;
+
+  pwgts  = iset(2*ncon, 0, graph->pwgts);
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+  bndind = graph->bndind;
+
+  /* Compute pwgts */
+  if (ncon == 1) {
+    for (i=0; i<nvtxs; i++) {
+      ASSERT(where[i] >= 0 && where[i] <= 1);
+      pwgts[where[i]] += vwgt[i];
+    }
+    ASSERT(pwgts[0]+pwgts[1] == graph->tvwgt[0]);
+  }
+  else {
+    for (i=0; i<nvtxs; i++) {
+      me = where[i];
+      for (j=0; j<ncon; j++)
+        pwgts[me*ncon+j] += vwgt[i*ncon+j];
+    }
+  }
+
+
+  /* Compute the required info for refinement  */
+  for (nbnd=0, mincut=0, i=0; i<nvtxs; i++) {
+    istart = xadj[i];
+    iend   = xadj[i+1];
+
+    me = where[i];
+    tid = ted = 0;
+
+    for (j=istart; j<iend; j++) {
+      if (me == where[adjncy[j]])
+        tid += adjwgt[j];
+      else
+        ted += adjwgt[j];
+    }
+    id[i] = tid;
+    ed[i] = ted;
+  
+    if (ted > 0 || istart == iend) {
+      BNDInsert(nbnd, bndind, bndptr, i);
+      mincut += ted;
+    }
+  }
+
+  graph->mincut = mincut/2;
+  graph->nbnd   = nbnd;
+
+}
+
+
+/*************************************************************************/
+/*! Projects a partition and computes the refinement params. */
+/*************************************************************************/
+void Project2WayPartition(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, istart, iend, nvtxs, nbnd, me, tid, ted;
+  idx_t *xadj, *adjncy, *adjwgt;
+  idx_t *cmap, *where, *bndptr, *bndind;
+  idx_t *cwhere, *cbndptr;
+  idx_t *id, *ed;
+  graph_t *cgraph;
+
+  Allocate2WayPartitionMemory(ctrl, graph);
+
+  cgraph  = graph->coarser;
+  cwhere  = cgraph->where;
+  cbndptr = cgraph->bndptr;
+
+  nvtxs   = graph->nvtxs;
+  cmap    = graph->cmap;
+  xadj    = graph->xadj;
+  adjncy  = graph->adjncy;
+  adjwgt  = graph->adjwgt;
+
+  where  = graph->where;
+  id     = graph->id;
+  ed     = graph->ed;
+
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+  bndind = graph->bndind;
+
+  /* Project the partition and record which of these nodes came from the
+     coarser boundary */
+  for (i=0; i<nvtxs; i++) {
+    j = cmap[i];
+    where[i] = cwhere[j];
+    cmap[i]  = cbndptr[j];
+  }
+
+  /* Compute the refinement information of the nodes */
+  for (nbnd=0, i=0; i<nvtxs; i++) {
+    istart = xadj[i];
+    iend   = xadj[i+1];
+  
+    tid = ted = 0;
+    if (cmap[i] == -1) { /* Interior node. Note that cmap[i] = cbndptr[cmap[i]] */
+      for (j=istart; j<iend; j++)
+        tid += adjwgt[j];
+    }
+    else { /* Potentially an interface node */
+      me = where[i];
+      for (j=istart; j<iend; j++) {
+        if (me == where[adjncy[j]])
+          tid += adjwgt[j];
+        else
+          ted += adjwgt[j];
+      }
+    }
+    id[i] = tid;
+    ed[i] = ted;
+
+    if (ted > 0 || istart == iend) 
+      BNDInsert(nbnd, bndind, bndptr, i);
+  }
+  graph->mincut = cgraph->mincut;
+  graph->nbnd   = nbnd;
+
+  /* copy pwgts */
+  icopy(2*graph->ncon, cgraph->pwgts, graph->pwgts);
+
+  FreeGraph(&graph->coarser);
+  graph->coarser = NULL;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/rename.h b/3rdParty/metis/metis-5.1.0/libmetis/rename.h
new file mode 100644
index 000000000..62b03b491
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/rename.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * rename.h
+ *
+ * This file contains header files
+ *
+ * Started 10/2/97
+ * George
+ *
+ * $Id: rename.h 13933 2013-03-29 22:20:46Z karypis $
+ *
+ */
+
+
+#ifndef _LIBMETIS_RENAME_H_
+#define _LIBMETIS_RENAME_H_
+
+
+/* balance.c */
+#define Balance2Way			libmetis__Balance2Way
+#define Bnd2WayBalance			libmetis__Bnd2WayBalance
+#define General2WayBalance		libmetis__General2WayBalance
+#define McGeneral2WayBalance            libmetis__McGeneral2WayBalance
+
+/* bucketsort.c */
+#define BucketSortKeysInc		libmetis__BucketSortKeysInc
+
+/* checkgraph.c */
+#define CheckGraph                      libmetis__CheckGraph
+#define CheckInputGraphWeights          libmetis__CheckInputGraphWeights
+#define FixGraph                        libmetis__FixGraph
+
+/* coarsen.c */
+#define CoarsenGraph			libmetis__CoarsenGraph
+#define Match_RM                        libmetis__Match_RM
+#define Match_SHEM                      libmetis__Match_SHEM
+#define Match_2Hop                      libmetis__Match_2Hop
+#define Match_2HopAny                   libmetis__Match_2HopAny
+#define Match_2HopAll                   libmetis__Match_2HopAll
+#define PrintCGraphStats                libmetis__PrintCGraphStats
+#define CreateCoarseGraph		libmetis__CreateCoarseGraph
+#define CreateCoarseGraphNoMask		libmetis__CreateCoarseGraphNoMask
+#define CreateCoarseGraphPerm		libmetis__CreateCoarseGraphPerm
+#define SetupCoarseGraph		libmetis__SetupCoarseGraph
+#define ReAdjustMemory			libmetis__ReAdjustMemory
+
+/* compress.c */
+#define CompressGraph			libmetis__CompressGraph
+#define PruneGraph			libmetis__PruneGraph
+
+/* contig.c */
+#define FindPartitionInducedComponents  libmetis__FindPartitionInducedComponents   
+#define IsConnected                     libmetis__IsConnected
+#define IsConnectedSubdomain            libmetis__IsConnectedSubdomain
+#define FindSepInducedComponents        libmetis__FindSepInducedComponents
+#define EliminateComponents             libmetis__EliminateComponents
+#define MoveGroupContigForCut           libmetis__MoveGroupContigForCut
+#define MoveGroupContigForVol           libmetis__MoveGroupContigForVol
+
+/* debug.c */
+#define ComputeCut			libmetis__ComputeCut
+#define ComputeVolume			libmetis__ComputeVolume
+#define ComputeMaxCut			libmetis__ComputeMaxCut
+#define CheckBnd			libmetis__CheckBnd
+#define CheckBnd2			libmetis__CheckBnd2
+#define CheckNodeBnd			libmetis__CheckNodeBnd
+#define CheckRInfo			libmetis__CheckRInfo
+#define CheckNodePartitionParams	libmetis__CheckNodePartitionParams
+#define IsSeparable			libmetis__IsSeparable
+#define CheckKWayVolPartitionParams     libmetis__CheckKWayVolPartitionParams
+
+/* fm.c */
+#define FM_2WayRefine                   libmetis__FM_2WayRefine
+#define FM_2WayCutRefine                libmetis__FM_2WayCutRefine
+#define FM_Mc2WayCutRefine              libmetis__FM_Mc2WayCutRefine
+#define SelectQueue                     libmetis__SelectQueue
+#define Print2WayRefineStats            libmetis__Print2WayRefineStats
+
+/* fortran.c */
+#define Change2CNumbering		libmetis__Change2CNumbering
+#define Change2FNumbering		libmetis__Change2FNumbering
+#define Change2FNumbering2		libmetis__Change2FNumbering2
+#define Change2FNumberingOrder		libmetis__Change2FNumberingOrder
+#define ChangeMesh2CNumbering		libmetis__ChangeMesh2CNumbering
+#define ChangeMesh2FNumbering		libmetis__ChangeMesh2FNumbering
+#define ChangeMesh2FNumbering2		libmetis__ChangeMesh2FNumbering2
+
+/* graph.c */
+#define SetupGraph			libmetis__SetupGraph
+#define SetupGraph_adjrsum              libmetis__SetupGraph_adjrsum
+#define SetupGraph_tvwgt                libmetis__SetupGraph_tvwgt
+#define SetupGraph_label                libmetis__SetupGraph_label
+#define SetupSplitGraph                 libmetis__SetupSplitGraph
+#define CreateGraph                     libmetis__CreateGraph
+#define InitGraph                       libmetis__InitGraph
+#define FreeRData                       libmetis__FreeRData
+#define FreeGraph                       libmetis__FreeGraph
+
+/* initpart.c */
+#define Init2WayPartition		libmetis__Init2WayPartition
+#define InitSeparator			libmetis__InitSeparator
+#define RandomBisection			libmetis__RandomBisection
+#define GrowBisection			libmetis__GrowBisection
+#define McRandomBisection               libmetis__McRandomBisection
+#define McGrowBisection                 libmetis__McGrowBisection
+#define GrowBisectionNode		libmetis__GrowBisectionNode
+
+/* kmetis.c */
+#define MlevelKWayPartitioning		libmetis__MlevelKWayPartitioning
+#define InitKWayPartitioning            libmetis__InitKWayPartitioning
+
+/* kwayfm.c */
+#define Greedy_KWayOptimize		libmetis__Greedy_KWayOptimize
+#define Greedy_KWayCutOptimize		libmetis__Greedy_KWayCutOptimize
+#define Greedy_KWayVolOptimize          libmetis__Greedy_KWayVolOptimize
+#define Greedy_McKWayCutOptimize        libmetis__Greedy_McKWayCutOptimize
+#define Greedy_McKWayVolOptimize        libmetis__Greedy_McKWayVolOptimize
+#define IsArticulationNode              libmetis__IsArticulationNode
+#define KWayVolUpdate                   libmetis__KWayVolUpdate
+
+/* kwayrefine.c */
+#define RefineKWay			libmetis__RefineKWay
+#define AllocateKWayPartitionMemory	libmetis__AllocateKWayPartitionMemory
+#define ComputeKWayPartitionParams	libmetis__ComputeKWayPartitionParams
+#define ProjectKWayPartition		libmetis__ProjectKWayPartition
+#define ComputeKWayBoundary		libmetis__ComputeKWayBoundary
+#define ComputeKWayVolGains             libmetis__ComputeKWayVolGains
+#define IsBalanced			libmetis__IsBalanced
+
+/* mcutil */
+#define rvecle                          libmetis__rvecle
+#define rvecge                          libmetis__rvecge
+#define rvecsumle                       libmetis__rvecsumle
+#define rvecmaxdiff                     libmetis__rvecmaxdiff
+#define ivecle                          libmetis__ivecle
+#define ivecge                          libmetis__ivecge
+#define ivecaxpylez                     libmetis__ivecaxpylez
+#define ivecaxpygez                     libmetis__ivecaxpygez
+#define BetterVBalance                  libmetis__BetterVBalance
+#define BetterBalance2Way               libmetis__BetterBalance2Way
+#define BetterBalanceKWay               libmetis__BetterBalanceKWay
+#define ComputeLoadImbalance            libmetis__ComputeLoadImbalance
+#define ComputeLoadImbalanceDiff        libmetis__ComputeLoadImbalanceDiff
+#define ComputeLoadImbalanceDiffVec     libmetis__ComputeLoadImbalanceDiffVec
+#define ComputeLoadImbalanceVec         libmetis__ComputeLoadImbalanceVec
+
+/* mesh.c */
+#define CreateGraphDual                 libmetis__CreateGraphDual
+#define FindCommonElements              libmetis__FindCommonElements
+#define CreateGraphNodal                libmetis__CreateGraphNodal
+#define FindCommonNodes                 libmetis__FindCommonNodes
+#define CreateMesh                      libmetis__CreateMesh
+#define InitMesh                        libmetis__InitMesh
+#define FreeMesh                        libmetis__FreeMesh
+
+/* meshpart.c */
+#define InduceRowPartFromColumnPart     libmetis__InduceRowPartFromColumnPart
+
+/* minconn.c */
+#define ComputeSubDomainGraph           libmetis__ComputeSubDomainGraph
+#define UpdateEdgeSubDomainGraph        libmetis__UpdateEdgeSubDomainGraph
+#define PrintSubDomainGraph             libmetis__PrintSubDomainGraph
+#define EliminateSubDomainEdges         libmetis__EliminateSubDomainEdges
+#define MoveGroupMinConnForCut          libmetis__MoveGroupMinConnForCut
+#define MoveGroupMinConnForVol          libmetis__MoveGroupMinConnForVol
+
+/* mincover.c */
+#define MinCover			libmetis__MinCover
+#define MinCover_Augment		libmetis__MinCover_Augment
+#define MinCover_Decompose		libmetis__MinCover_Decompose
+#define MinCover_ColDFS			libmetis__MinCover_ColDFS
+#define MinCover_RowDFS			libmetis__MinCover_RowDFS
+
+/* mmd.c */
+#define genmmd				libmetis__genmmd
+#define mmdelm				libmetis__mmdelm
+#define mmdint				libmetis__mmdint
+#define mmdnum				libmetis__mmdnum
+#define mmdupd				libmetis__mmdupd
+
+
+/* ometis.c */
+#define MlevelNestedDissection		libmetis__MlevelNestedDissection
+#define MlevelNestedDissectionCC	libmetis__MlevelNestedDissectionCC
+#define MlevelNodeBisectionMultiple	libmetis__MlevelNodeBisectionMultiple
+#define MlevelNodeBisectionL2		libmetis__MlevelNodeBisectionL2
+#define MlevelNodeBisectionL1		libmetis__MlevelNodeBisectionL1
+#define SplitGraphOrder			libmetis__SplitGraphOrder
+#define SplitGraphOrderCC		libmetis__SplitGraphOrderCC
+#define MMDOrder			libmetis__MMDOrder
+
+/* options.c */
+#define SetupCtrl                       libmetis__SetupCtrl
+#define SetupKWayBalMultipliers         libmetis__SetupKWayBalMultipliers
+#define Setup2WayBalMultipliers         libmetis__Setup2WayBalMultipliers
+#define PrintCtrl                       libmetis__PrintCtrl
+#define FreeCtrl                        libmetis__FreeCtrl
+#define CheckParams                     libmetis__CheckParams
+
+/* parmetis.c */
+#define MlevelNestedDissectionP		libmetis__MlevelNestedDissectionP
+#define FM_2WayNodeRefine1SidedP        libmetis__FM_2WayNodeRefine1SidedP
+#define FM_2WayNodeRefine2SidedP        libmetis__FM_2WayNodeRefine2SidedP
+
+/* pmetis.c */
+#define MlevelRecursiveBisection	libmetis__MlevelRecursiveBisection
+#define MultilevelBisect		libmetis__MultilevelBisect
+#define SplitGraphPart			libmetis__SplitGraphPart
+
+/* refine.c */
+#define Refine2Way			libmetis__Refine2Way
+#define Allocate2WayPartitionMemory	libmetis__Allocate2WayPartitionMemory
+#define Compute2WayPartitionParams	libmetis__Compute2WayPartitionParams
+#define Project2WayPartition		libmetis__Project2WayPartition
+
+/* separator.c */
+#define ConstructSeparator		libmetis__ConstructSeparator
+#define ConstructMinCoverSeparator	libmetis__ConstructMinCoverSeparator
+
+/* sfm.c */
+#define FM_2WayNodeRefine2Sided         libmetis__FM_2WayNodeRefine2Sided 
+#define FM_2WayNodeRefine1Sided         libmetis__FM_2WayNodeRefine1Sided
+#define FM_2WayNodeBalance              libmetis__FM_2WayNodeBalance
+
+/* srefine.c */
+#define Refine2WayNode			libmetis__Refine2WayNode
+#define Allocate2WayNodePartitionMemory	libmetis__Allocate2WayNodePartitionMemory
+#define Compute2WayNodePartitionParams	libmetis__Compute2WayNodePartitionParams
+#define Project2WayNodePartition	libmetis__Project2WayNodePartition
+
+/* stat.c */
+#define ComputePartitionInfoBipartite   libmetis__ComputePartitionInfoBipartite
+#define ComputePartitionBalance		libmetis__ComputePartitionBalance
+#define ComputeElementBalance		libmetis__ComputeElementBalance
+
+/* timing.c */
+#define InitTimers			libmetis__InitTimers
+#define PrintTimers			libmetis__PrintTimers
+
+/* util.c */
+#define iargmax_strd                    libmetis__iargmax_strd 
+#define iargmax_nrm                     libmetis__iargmax_nrm
+#define iargmax2_nrm                    libmetis__iargmax2_nrm
+#define rargmax2                        libmetis__rargmax2
+#define InitRandom                      libmetis__InitRandom
+#define metis_rcode                     libmetis__metis_rcode
+
+/* wspace.c */
+#define AllocateWorkSpace               libmetis__AllocateWorkSpace                  
+#define AllocateRefinementWorkSpace     libmetis__AllocateRefinementWorkSpace
+#define FreeWorkSpace                   libmetis__FreeWorkSpace
+#define wspacemalloc                    libmetis__wspacemalloc
+#define wspacepush                      libmetis__wspacepush
+#define wspacepop                       libmetis__wspacepop
+#define iwspacemalloc                   libmetis__iwspacemalloc
+#define rwspacemalloc                   libmetis__rwspacemalloc
+#define ikvwspacemalloc                 libmetis__ikvwspacemalloc
+#define cnbrpoolReset                   libmetis__cnbrpoolReset
+#define cnbrpoolGetNext                 libmetis__cnbrpoolGetNext
+#define vnbrpoolReset                   libmetis__vnbrpoolReset
+#define vnbrpoolGetNext                 libmetis__vnbrpoolGetNext
+
+#endif
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/separator.c b/3rdParty/metis/metis-5.1.0/libmetis/separator.c
new file mode 100644
index 000000000..72dae9b64
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/separator.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * separator.c
+ *
+ * This file contains code for separator extraction
+ *
+ * Started 8/1/97
+ * George
+ *
+ * $Id: separator.c 10481 2011-07-05 18:01:23Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+/*************************************************************************
+* This function takes a bisection and constructs a minimum weight vertex 
+* separator out of it. It uses the node-based separator refinement for it.
+**************************************************************************/
+void ConstructSeparator(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, k, nvtxs, nbnd;
+  idx_t *xadj, *where, *bndind;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  nbnd   = graph->nbnd;
+  bndind = graph->bndind;
+
+  where = icopy(nvtxs, graph->where, iwspacemalloc(ctrl, nvtxs));
+
+  /* Put the nodes in the boundary into the separator */
+  for (i=0; i<nbnd; i++) {
+    j = bndind[i];
+    if (xadj[j+1]-xadj[j] > 0)  /* Ignore islands */
+      where[j] = 2;
+  }
+
+  FreeRData(graph);
+
+  Allocate2WayNodePartitionMemory(ctrl, graph);
+  icopy(nvtxs, where, graph->where);
+
+  WCOREPOP;
+
+  ASSERT(IsSeparable(graph));
+
+  Compute2WayNodePartitionParams(ctrl, graph);
+
+  ASSERT(CheckNodePartitionParams(graph));
+
+  FM_2WayNodeRefine2Sided(ctrl, graph, 1); 
+  FM_2WayNodeRefine1Sided(ctrl, graph, 4); 
+
+  ASSERT(IsSeparable(graph));
+
+}
+
+
+
+/*************************************************************************
+* This function takes a bisection and constructs a minimum weight vertex 
+* separator out of it. It uses an unweighted minimum-cover algorithm
+* followed by node-based separator refinement.
+**************************************************************************/
+void ConstructMinCoverSeparator(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, jj, k, l, nvtxs, nbnd, bnvtxs[3], bnedges[2], csize;
+  idx_t *xadj, *adjncy, *bxadj, *badjncy;
+  idx_t *where, *bndind, *bndptr, *vmap, *ivmap, *cover;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  nbnd   = graph->nbnd;
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+
+  vmap  = iwspacemalloc(ctrl, nvtxs);
+  ivmap = iwspacemalloc(ctrl, nbnd);
+  cover = iwspacemalloc(ctrl, nbnd);
+
+  if (nbnd > 0) {
+    /* Go through the boundary and determine the sizes of the bipartite graph */
+    bnvtxs[0] = bnvtxs[1] = bnedges[0] = bnedges[1] = 0;
+    for (i=0; i<nbnd; i++) {
+      j = bndind[i];
+      k = where[j];
+      if (xadj[j+1]-xadj[j] > 0) {
+        bnvtxs[k]++;
+        bnedges[k] += xadj[j+1]-xadj[j];
+      }
+    }
+
+    bnvtxs[2] = bnvtxs[0]+bnvtxs[1];
+    bnvtxs[1] = bnvtxs[0];
+    bnvtxs[0] = 0;
+
+    bxadj   = iwspacemalloc(ctrl, bnvtxs[2]+1);
+    badjncy = iwspacemalloc(ctrl, bnedges[0]+bnedges[1]+1);
+
+    /* Construct the ivmap and vmap */
+    ASSERT(iset(nvtxs, -1, vmap) == vmap);
+    for (i=0; i<nbnd; i++) {
+      j = bndind[i];
+      k = where[j];
+      if (xadj[j+1]-xadj[j] > 0) {
+        vmap[j] = bnvtxs[k];
+        ivmap[bnvtxs[k]++] = j;
+      }
+    }
+
+    /* OK, go through and put the vertices of each part starting from 0 */
+    bnvtxs[1] = bnvtxs[0];
+    bnvtxs[0] = 0;
+    bxadj[0] = l = 0;
+    for (k=0; k<2; k++) {
+      for (ii=0; ii<nbnd; ii++) {
+        i = bndind[ii];
+        if (where[i] == k && xadj[i] < xadj[i+1]) {
+          for (j=xadj[i]; j<xadj[i+1]; j++) {
+            jj = adjncy[j];
+            if (where[jj] != k) {
+              ASSERT(bndptr[jj] != -1); 
+              ASSERTP(vmap[jj] != -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", jj, vmap[jj], graph->bndptr[jj]));
+              badjncy[l++] = vmap[jj];
+            }
+          }
+          bxadj[++bnvtxs[k]] = l;
+        }
+      }
+    }
+
+    ASSERT(l <= bnedges[0]+bnedges[1]);
+
+    MinCover(bxadj, badjncy, bnvtxs[0], bnvtxs[1], cover, &csize);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO,
+      printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, bnvtxs[0], bnvtxs[1]-bnvtxs[0], csize));
+
+    for (i=0; i<csize; i++) {
+      j = ivmap[cover[i]];
+      where[j] = 2;
+    }
+  }
+  else {
+    IFSET(ctrl->dbglvl, METIS_DBG_SEPINFO,
+      printf("Nvtxs: %6"PRIDX", [%5"PRIDX" %5"PRIDX"], Cut: %6"PRIDX", SS: [%6"PRIDX" %6"PRIDX"], Cover: %6"PRIDX"\n", nvtxs, graph->pwgts[0], graph->pwgts[1], graph->mincut, (idx_t)0, (idx_t)0, (idx_t)0));
+  }
+
+  /* Prepare to refine the vertex separator */
+  icopy(nvtxs, graph->where, vmap);
+
+  FreeRData(graph);
+
+  Allocate2WayNodePartitionMemory(ctrl, graph);
+  icopy(nvtxs, vmap, graph->where);
+
+  WCOREPOP;
+
+  Compute2WayNodePartitionParams(ctrl, graph);
+
+  ASSERT(CheckNodePartitionParams(graph));
+
+  FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); 
+
+  ASSERT(IsSeparable(graph));
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/sfm.c b/3rdParty/metis/metis-5.1.0/libmetis/sfm.c
new file mode 100644
index 000000000..d41817380
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/sfm.c
@@ -0,0 +1,612 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * sfm.c
+ *
+ * This file contains code that implementes an FM-based separator refinement
+ *
+ * Started 8/1/97
+ * George
+ *
+ * $Id: sfm.c 10874 2011-10-17 23:13:00Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function performs a node-based FM refinement */
+/**************************************************************************/
+void FM_2WayNodeRefine2Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter)
+{
+  idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind;
+  idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
+  idx_t *mptr, *mind, *moved, *swaps;
+  rpq_t *queues[2]; 
+  nrinfo_t *rinfo;
+  idx_t higain, oldgain, mincut, initcut, mincutorder;	
+  idx_t pass, to, other, limit;
+  idx_t badmaxpwgt, mindiff, newdiff;
+  idx_t u[2], g[2];
+  real_t mult;   
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  rinfo  = graph->nrinfo;
+
+  queues[0] = rpqCreate(nvtxs);
+  queues[1] = rpqCreate(nvtxs);
+
+  moved = iwspacemalloc(ctrl, nvtxs);
+  swaps = iwspacemalloc(ctrl, nvtxs);
+  mptr  = iwspacemalloc(ctrl, nvtxs+1);
+  mind  = iwspacemalloc(ctrl, 2*nvtxs);
+
+  mult = 0.5*ctrl->ubfactors[0];
+  badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2]));
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("Partitions-N2: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));
+
+  for (pass=0; pass<niter; pass++) {
+    iset(nvtxs, -1, moved);
+    rpqReset(queues[0]);
+    rpqReset(queues[1]);
+
+    mincutorder = -1;
+    initcut = mincut = graph->mincut;
+    nbnd = graph->nbnd;
+
+    /* use the swaps array in place of the traditional perm array to save memory */
+    irandArrayPermute(nbnd, swaps, nbnd, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[swaps[ii]];
+      ASSERT(where[i] == 2);
+      rpqInsert(queues[0], i, vwgt[i]-rinfo[i].edegrees[1]);
+      rpqInsert(queues[1], i, vwgt[i]-rinfo[i].edegrees[0]);
+    }
+
+    ASSERT(CheckNodeBnd(graph, nbnd));
+    ASSERT(CheckNodePartitionParams(graph));
+
+    limit = (ctrl->compress ? gk_min(5*nbnd, 400) : gk_min(2*nbnd, 300));
+
+    /******************************************************
+    * Get into the FM loop
+    *******************************************************/
+    mptr[0] = nmind = 0;
+    mindiff = iabs(pwgts[0]-pwgts[1]);
+    to = (pwgts[0] < pwgts[1] ? 0 : 1);
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      u[0] = rpqSeeTopVal(queues[0]);  
+      u[1] = rpqSeeTopVal(queues[1]);
+      if (u[0] != -1 && u[1] != -1) {
+        g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1];
+        g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0];
+
+        to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); 
+
+        if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) 
+          to = (to+1)%2;
+      }
+      else if (u[0] == -1 && u[1] == -1) {
+        break;
+      }
+      else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) {
+        to = 0;
+      }
+      else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) {
+        to = 1;
+      }
+      else
+        break;
+
+      other = (to+1)%2;
+
+      higain = rpqGetTop(queues[to]);
+      if (moved[higain] == -1) /* Delete if it was in the separator originally */
+        rpqDelete(queues[other], higain);
+
+      ASSERT(bndptr[higain] != -1);
+
+      /* The following check is to ensure we break out if there is a posibility
+         of over-running the mind array.  */
+      if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) 
+        break;
+
+      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]);
+
+      newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other]));
+      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
+        mincut = pwgts[2];
+        mincutorder = nswaps;
+        mindiff = newdiff;
+      }
+      else {
+        if (nswaps - mincutorder > 2*limit || 
+            (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) {
+          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]);
+          break; /* No further improvement, break out */
+        }
+      }
+
+      BNDDelete(nbnd, bndind, bndptr, higain);
+      pwgts[to] += vwgt[higain];
+      where[higain] = to;
+      moved[higain] = nswaps;
+      swaps[nswaps] = higain;  
+
+
+      /**********************************************************
+      * Update the degrees of the affected nodes
+      ***********************************************************/
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
+          oldgain = vwgt[k]-rinfo[k].edegrees[to];
+          rinfo[k].edegrees[to] += vwgt[higain];
+          if (moved[k] == -1 || moved[k] == -(2+other))
+            rpqUpdate(queues[other], k, oldgain-vwgt[higain]);
+        }
+        else if (where[k] == other) { /* This vertex is pulled into the separator */
+          ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k]));
+          BNDInsert(nbnd, bndind, bndptr, k);
+
+          mind[nmind++] = k;  /* Keep track for rollback */
+          where[k] = 2;
+          pwgts[other] -= vwgt[k];
+
+          edegrees = rinfo[k].edegrees;
+          edegrees[0] = edegrees[1] = 0;
+          for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+            kk = adjncy[jj];
+            if (where[kk] != 2) 
+              edegrees[where[kk]] += vwgt[kk];
+            else {
+              oldgain = vwgt[kk]-rinfo[kk].edegrees[other];
+              rinfo[kk].edegrees[other] -= vwgt[k];
+              if (moved[kk] == -1 || moved[kk] == -(2+to))
+                rpqUpdate(queues[to], kk, oldgain+vwgt[k]);
+            }
+          }
+
+          /* Insert the new vertex into the priority queue. Only one side! */
+          if (moved[k] == -1) {
+            rpqInsert(queues[to], k, vwgt[k]-edegrees[other]);
+            moved[k] = -(2+to);
+          }
+        }
+      }
+      mptr[nswaps+1] = nmind;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,
+            printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] [%4"PRIDX" %4"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2]));
+
+    }
+
+
+    /****************************************************************
+    * Roll back computation 
+    *****************************************************************/
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      ASSERT(CheckNodePartitionParams(graph));
+
+      to = where[higain];
+      other = (to+1)%2;
+      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
+      where[higain] = 2;
+      BNDInsert(nbnd, bndind, bndptr, higain);
+
+      edegrees = rinfo[higain].edegrees;
+      edegrees[0] = edegrees[1] = 0;
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) 
+          rinfo[k].edegrees[to] -= vwgt[higain];
+        else
+          edegrees[where[k]] += vwgt[k];
+      }
+
+      /* Push nodes out of the separator */
+      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
+        k = mind[j];
+        ASSERT(where[k] == 2);
+        where[k] = other;
+        INC_DEC(pwgts[other], pwgts[2], vwgt[k]);
+        BNDDelete(nbnd, bndind, bndptr, k);
+        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+          kk = adjncy[jj];
+          if (where[kk] == 2) 
+            rinfo[kk].edegrees[other] += vwgt[k];
+        }
+      }
+    }
+
+    ASSERT(mincut == pwgts[2]);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+      printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd));
+
+    graph->mincut = mincut;
+    graph->nbnd = nbnd;
+
+    if (mincutorder == -1 || mincut >= initcut)
+      break;
+  }
+
+  rpqDestroy(queues[0]);
+  rpqDestroy(queues[1]);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function performs a node-based FM refinement. 
+    Each refinement iteration is split into two sub-iterations. 
+    In each sub-iteration only moves to one of the left/right partitions 
+    is allowed; hence, it is one-sided. 
+*/
+/**************************************************************************/
+void FM_2WayNodeRefine1Sided(ctrl_t *ctrl, graph_t *graph, idx_t niter)
+{
+  idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, iend;
+  idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
+  idx_t *mptr, *mind, *swaps;
+  rpq_t *queue; 
+  nrinfo_t *rinfo;
+  idx_t higain, mincut, initcut, mincutorder;	
+  idx_t pass, to, other, limit;
+  idx_t badmaxpwgt, mindiff, newdiff;
+  real_t mult;
+
+  WCOREPUSH;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  rinfo  = graph->nrinfo;
+
+  queue = rpqCreate(nvtxs);
+
+  swaps = iwspacemalloc(ctrl, nvtxs);
+  mptr  = iwspacemalloc(ctrl, nvtxs+1);
+  mind  = iwspacemalloc(ctrl, 2*nvtxs);
+
+  mult = 0.5*ctrl->ubfactors[0];
+  badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]+pwgts[2]));
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("Partitions-N1: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX"\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));
+
+  to = (pwgts[0] < pwgts[1] ? 1 : 0);
+  for (pass=0; pass<2*niter; pass++) {  /* the 2*niter is for the two sides */
+    other = to; 
+    to    = (to+1)%2;
+
+    rpqReset(queue);
+
+    mincutorder = -1;
+    initcut = mincut = graph->mincut;
+    nbnd = graph->nbnd;
+
+    /* use the swaps array in place of the traditional perm array to save memory */
+    irandArrayPermute(nbnd, swaps, nbnd, 1);
+    for (ii=0; ii<nbnd; ii++) {
+      i = bndind[swaps[ii]];
+      ASSERT(where[i] == 2);
+      rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[other]);
+    }
+
+    ASSERT(CheckNodeBnd(graph, nbnd));
+    ASSERT(CheckNodePartitionParams(graph));
+
+    limit = (ctrl->compress ? gk_min(5*nbnd, 500) : gk_min(3*nbnd, 300));
+
+    /******************************************************
+    * Get into the FM loop
+    *******************************************************/
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr));
+    mptr[0] = nmind = 0;
+    mindiff = iabs(pwgts[0]-pwgts[1]);
+    for (nswaps=0; nswaps<nvtxs; nswaps++) {
+      if ((higain = rpqGetTop(queue)) == -1)
+        break;
+
+      ASSERT(bndptr[higain] != -1);
+
+      /* The following check is to ensure we break out if there is a posibility
+         of over-running the mind array.  */
+      if (nmind + xadj[higain+1]-xadj[higain] >= 2*nvtxs-1) 
+        break;
+
+      if (pwgts[to]+vwgt[higain] > badmaxpwgt) 
+        break;  /* No point going any further. Balance will be bad */
+
+      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]);
+
+      newdiff = iabs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other]));
+      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
+        mincut      = pwgts[2];
+        mincutorder = nswaps;
+        mindiff     = newdiff;
+      }
+      else {
+        if (nswaps - mincutorder > 3*limit || 
+            (nswaps - mincutorder > limit && pwgts[2] > 1.10*mincut)) {
+          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]);
+          break; /* No further improvement, break out */
+        }
+      }
+
+      BNDDelete(nbnd, bndind, bndptr, higain);
+      pwgts[to]     += vwgt[higain];
+      where[higain]  = to;
+      swaps[nswaps]  = higain;  
+
+
+      /**********************************************************
+      * Update the degrees of the affected nodes
+      ***********************************************************/
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux1Tmr));
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+
+        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
+          rinfo[k].edegrees[to] += vwgt[higain];
+        }
+        else if (where[k] == other) { /* This vertex is pulled into the separator */
+          ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k]));
+          BNDInsert(nbnd, bndind, bndptr, k);
+
+          mind[nmind++] = k;  /* Keep track for rollback */
+          where[k] = 2;
+          pwgts[other] -= vwgt[k];
+
+          edegrees = rinfo[k].edegrees;
+          edegrees[0] = edegrees[1] = 0;
+          for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) {
+            kk = adjncy[jj];
+            if (where[kk] != 2) 
+              edegrees[where[kk]] += vwgt[kk];
+            else {
+              rinfo[kk].edegrees[other] -= vwgt[k];
+
+              /* Since the moves are one-sided this vertex has not been moved yet */
+              rpqUpdate(queue, kk, vwgt[kk]-rinfo[kk].edegrees[other]); 
+            }
+          }
+
+          /* Insert the new vertex into the priority queue. Safe due to one-sided moves */
+          rpqInsert(queue, k, vwgt[k]-edegrees[other]);
+        }
+      }
+      mptr[nswaps+1] = nmind;
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux1Tmr));
+
+
+      IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,
+            printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %5"PRIDX" [%5"PRIDX"] \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"] [%3"PRIDX" %2"PRIDX"]\n", 
+                higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], 
+                pwgts[0], pwgts[1], pwgts[2], nswaps, limit));
+    }
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr));
+
+
+    /****************************************************************
+    * Roll back computation 
+    *****************************************************************/
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux2Tmr));
+    for (nswaps--; nswaps>mincutorder; nswaps--) {
+      higain = swaps[nswaps];
+
+      ASSERT(CheckNodePartitionParams(graph));
+      ASSERT(where[higain] == to);
+
+      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
+      where[higain] = 2;
+      BNDInsert(nbnd, bndind, bndptr, higain);
+
+      edegrees = rinfo[higain].edegrees;
+      edegrees[0] = edegrees[1] = 0;
+      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+        k = adjncy[j];
+        if (where[k] == 2) 
+          rinfo[k].edegrees[to] -= vwgt[higain];
+        else
+          edegrees[where[k]] += vwgt[k];
+      }
+
+      /* Push nodes out of the separator */
+      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
+        k = mind[j];
+        ASSERT(where[k] == 2);
+        where[k] = other;
+        INC_DEC(pwgts[other], pwgts[2], vwgt[k]);
+        BNDDelete(nbnd, bndind, bndptr, k);
+        for (jj=xadj[k], iend=xadj[k+1]; jj<iend; jj++) {
+          kk = adjncy[jj];
+          if (where[kk] == 2) 
+            rinfo[kk].edegrees[other] += vwgt[k];
+        }
+      }
+    }
+    IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux2Tmr));
+
+    ASSERT(mincut == pwgts[2]);
+
+    IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+      printf("\tMinimum sep: %6"PRIDX" at %5"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd));
+
+    graph->mincut = mincut;
+    graph->nbnd   = nbnd;
+
+    if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut))
+      break;
+  }
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
+
+/*************************************************************************/
+/*! This function balances the left/right partitions of a separator 
+    tri-section */
+/*************************************************************************/
+void FM_2WayNodeBalance(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, gain;
+  idx_t badmaxpwgt, higain, oldgain, pass, to, other;
+  idx_t *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
+  idx_t *perm, *moved;
+  rpq_t *queue; 
+  nrinfo_t *rinfo;
+  real_t mult;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt   = graph->vwgt;
+
+  bndind = graph->bndind;
+  bndptr = graph->bndptr;
+  where  = graph->where;
+  pwgts  = graph->pwgts;
+  rinfo  = graph->nrinfo;
+
+  mult = 0.5*ctrl->ubfactors[0];
+
+  badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]));
+  if (gk_max(pwgts[0], pwgts[1]) < badmaxpwgt)
+    return;
+  if (iabs(pwgts[0]-pwgts[1]) < 3*graph->tvwgt[0]/nvtxs)
+    return;
+
+  WCOREPUSH;
+
+  to    = (pwgts[0] < pwgts[1] ? 0 : 1); 
+  other = (to+1)%2;
+
+  queue = rpqCreate(nvtxs);
+
+  perm  = iwspacemalloc(ctrl, nvtxs);
+  moved = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("Partitions: [%6"PRIDX" %6"PRIDX"] Nv-Nb[%6"PRIDX" %6"PRIDX"]. ISep: %6"PRIDX" [B]\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));
+
+  nbnd = graph->nbnd;
+  irandArrayPermute(nbnd, perm, nbnd, 1);
+  for (ii=0; ii<nbnd; ii++) {
+    i = bndind[perm[ii]];
+    ASSERT(where[i] == 2);
+    rpqInsert(queue, i, vwgt[i]-rinfo[i].edegrees[other]);
+  }
+
+  ASSERT(CheckNodeBnd(graph, nbnd));
+  ASSERT(CheckNodePartitionParams(graph));
+
+  /******************************************************
+  * Get into the FM loop
+  *******************************************************/
+  for (nswaps=0; nswaps<nvtxs; nswaps++) {
+    if ((higain = rpqGetTop(queue)) == -1)
+      break;
+
+    moved[higain] = 1;
+
+    gain = vwgt[higain]-rinfo[higain].edegrees[other];
+    badmaxpwgt = (idx_t)(mult*(pwgts[0]+pwgts[1]));
+
+    /* break if other is now underwight */
+    if (pwgts[to] > pwgts[other])
+      break;
+
+    /* break if balance is achieved and no +ve or zero gain */
+    if (gain < 0 && pwgts[other] < badmaxpwgt) 
+      break;
+
+    /* skip this vertex if it will violate balance on the other side */
+    if (pwgts[to]+vwgt[higain] > badmaxpwgt) 
+      continue;
+
+    ASSERT(bndptr[higain] != -1);
+
+    pwgts[2] -= gain;
+
+    BNDDelete(nbnd, bndind, bndptr, higain);
+    pwgts[to] += vwgt[higain];
+    where[higain] = to;
+
+    IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,
+          printf("Moved %6"PRIDX" to %3"PRIDX", Gain: %3"PRIDX", \t[%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", higain, to, vwgt[higain]-rinfo[higain].edegrees[other], pwgts[0], pwgts[1], pwgts[2]));
+
+
+    /**********************************************************
+    * Update the degrees of the affected nodes
+    ***********************************************************/
+    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
+      k = adjncy[j];
+      if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
+        rinfo[k].edegrees[to] += vwgt[higain];
+      }
+      else if (where[k] == other) { /* This vertex is pulled into the separator */
+        ASSERTP(bndptr[k] == -1, ("%"PRIDX" %"PRIDX" %"PRIDX"\n", k, bndptr[k], where[k]));
+        BNDInsert(nbnd, bndind, bndptr, k);
+
+        where[k] = 2;
+        pwgts[other] -= vwgt[k];
+
+        edegrees = rinfo[k].edegrees;
+        edegrees[0] = edegrees[1] = 0;
+        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
+          kk = adjncy[jj];
+          if (where[kk] != 2) 
+            edegrees[where[kk]] += vwgt[kk];
+          else {
+            ASSERT(bndptr[kk] != -1);
+            oldgain = vwgt[kk]-rinfo[kk].edegrees[other];
+            rinfo[kk].edegrees[other] -= vwgt[k];
+
+            if (moved[kk] == -1)
+              rpqUpdate(queue, kk, oldgain+vwgt[k]);
+          }
+        }
+
+        /* Insert the new vertex into the priority queue */
+        rpqInsert(queue, k, vwgt[k]-edegrees[other]);
+      }
+    }
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
+    printf("\tBalanced sep: %6"PRIDX" at %4"PRIDX", PWGTS: [%6"PRIDX" %6"PRIDX"], NBND: %6"PRIDX"\n", pwgts[2], nswaps, pwgts[0], pwgts[1], nbnd));
+
+  graph->mincut = pwgts[2];
+  graph->nbnd   = nbnd;
+
+  rpqDestroy(queue);
+
+  WCOREPOP;
+}
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/srefine.c b/3rdParty/metis/metis-5.1.0/libmetis/srefine.c
new file mode 100644
index 000000000..603f782ad
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/srefine.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * srefine.c
+ *
+ * This file contains code for the separator refinement algortihms
+ *
+ * Started 8/1/97
+ * George
+ *
+ * $Id: srefine.c 10515 2011-07-08 15:46:18Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function is the entry point of the separator refinement. 
+    It does not perform any refinement on graph, but it starts by first
+    projecting it to the next level finer graph and proceeds from there. */
+/*************************************************************************/
+void Refine2WayNode(ctrl_t *ctrl, graph_t *orggraph, graph_t *graph)
+{
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->UncoarsenTmr));
+
+  if (graph == orggraph) {
+    Compute2WayNodePartitionParams(ctrl, graph);
+  }
+  else {
+    do {
+      graph = graph->finer;
+
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ProjectTmr));
+      Project2WayNodePartition(ctrl, graph);
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ProjectTmr));
+
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->RefTmr));
+      FM_2WayNodeBalance(ctrl, graph); 
+
+      ASSERT(CheckNodePartitionParams(graph));
+
+      switch (ctrl->rtype) {
+        case METIS_RTYPE_SEP2SIDED:
+          FM_2WayNodeRefine2Sided(ctrl, graph, ctrl->niter); 
+          break;
+        case METIS_RTYPE_SEP1SIDED:
+          FM_2WayNodeRefine1Sided(ctrl, graph, ctrl->niter); 
+          break;
+        default:
+          gk_errexit(SIGERR, "Unknown rtype of %d\n", ctrl->rtype);
+      }
+      IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->RefTmr));
+
+    } while (graph != orggraph);
+  }
+
+  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->UncoarsenTmr));
+}
+
+
+/*************************************************************************/
+/*! This function allocates memory for 2-way node-based refinement */
+/**************************************************************************/
+void Allocate2WayNodePartitionMemory(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t nvtxs;
+
+  nvtxs = graph->nvtxs;
+
+  graph->pwgts  = imalloc(3, "Allocate2WayNodePartitionMemory: pwgts");
+  graph->where  = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: where");
+  graph->bndptr = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndptr");
+  graph->bndind = imalloc(nvtxs, "Allocate2WayNodePartitionMemory: bndind");
+  graph->nrinfo = (nrinfo_t *)gk_malloc(nvtxs*sizeof(nrinfo_t), "Allocate2WayNodePartitionMemory: nrinfo");
+}
+
+
+/*************************************************************************/
+/*! This function computes the edegrees[] to the left & right sides */
+/*************************************************************************/
+void Compute2WayNodePartitionParams(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, nvtxs, nbnd;
+  idx_t *xadj, *adjncy, *vwgt;
+  idx_t *where, *pwgts, *bndind, *bndptr, *edegrees;
+  nrinfo_t *rinfo;
+  idx_t me, other;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  vwgt   = graph->vwgt;
+  adjncy = graph->adjncy;
+
+  where  = graph->where;
+  rinfo  = graph->nrinfo;
+  pwgts  = iset(3, 0, graph->pwgts);
+  bndind = graph->bndind;
+  bndptr = iset(nvtxs, -1, graph->bndptr);
+
+
+  /*------------------------------------------------------------
+  / Compute now the separator external degrees
+  /------------------------------------------------------------*/
+  nbnd = 0;
+  for (i=0; i<nvtxs; i++) {
+    me = where[i];
+    pwgts[me] += vwgt[i];
+
+    ASSERT(me >=0 && me <= 2);
+
+    if (me == 2) { /* If it is on the separator do some computations */
+      BNDInsert(nbnd, bndind, bndptr, i);
+
+      edegrees = rinfo[i].edegrees;
+      edegrees[0] = edegrees[1] = 0;
+
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        other = where[adjncy[j]];
+        if (other != 2)
+          edegrees[other] += vwgt[adjncy[j]];
+      }
+    }
+  }
+
+  ASSERT(CheckNodeBnd(graph, nbnd));
+
+  graph->mincut = pwgts[2];
+  graph->nbnd   = nbnd;
+}
+
+
+/*************************************************************************/
+/*! This function projects the node-based bisection */
+/*************************************************************************/
+void Project2WayNodePartition(ctrl_t *ctrl, graph_t *graph)
+{
+  idx_t i, j, nvtxs;
+  idx_t *cmap, *where, *cwhere;
+  graph_t *cgraph;
+
+  cgraph = graph->coarser;
+  cwhere = cgraph->where;
+
+  nvtxs = graph->nvtxs;
+  cmap  = graph->cmap;
+
+  Allocate2WayNodePartitionMemory(ctrl, graph);
+  where = graph->where;
+  
+  /* Project the partition */
+  for (i=0; i<nvtxs; i++) {
+    where[i] = cwhere[cmap[i]];
+    ASSERTP(where[i] >= 0 && where[i] <= 2, ("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", 
+          i, cmap[i], where[i], cwhere[cmap[i]]));
+  }
+
+  FreeGraph(&graph->coarser);
+  graph->coarser = NULL;
+
+  Compute2WayNodePartitionParams(ctrl, graph);
+}
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/stat.c b/3rdParty/metis/metis-5.1.0/libmetis/stat.c
new file mode 100644
index 000000000..f19791b53
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/stat.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * stat.c
+ *
+ * This file computes various statistics
+ *
+ * Started 7/25/97
+ * George
+ *
+ * $Id: stat.c 9942 2011-05-17 22:09:52Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************
+* This function computes cuts and balance information
+**************************************************************************/
+void ComputePartitionInfoBipartite(graph_t *graph, idx_t nparts, idx_t *where)
+{
+  idx_t i, j, k, nvtxs, ncon, mustfree=0;
+  idx_t *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr;
+  idx_t *padjncy, *padjwgt, *padjcut;
+
+  nvtxs = graph->nvtxs;
+  ncon = graph->ncon;
+  xadj = graph->xadj;
+  adjncy = graph->adjncy;
+  vwgt = graph->vwgt;
+  vsize = graph->vsize;
+  adjwgt = graph->adjwgt;
+
+  if (vwgt == NULL) {
+    vwgt = graph->vwgt = ismalloc(nvtxs, 1, "vwgt");
+    mustfree = 1;
+  }
+  if (adjwgt == NULL) {
+    adjwgt = graph->adjwgt = ismalloc(xadj[nvtxs], 1, "adjwgt");
+    mustfree += 2;
+  }
+
+  printf("%"PRIDX"-way Cut: %5"PRIDX", Vol: %5"PRIDX", ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where));
+
+  /* Compute balance information */
+  kpwgts = ismalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts");
+
+  for (i=0; i<nvtxs; i++) {
+    for (j=0; j<ncon; j++) 
+      kpwgts[where[i]*ncon+j] += vwgt[i*ncon+j];
+  }
+
+  if (ncon == 1) {
+    printf("\tBalance: %5.3"PRREAL" out of %5.3"PRREAL"\n", 
+            1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)),
+            1.0*nparts*vwgt[iargmax(nvtxs, vwgt)]/(1.0*isum(nparts, kpwgts, 1)));
+  }
+  else {
+    printf("\tBalance:");
+    for (j=0; j<ncon; j++) 
+      printf(" (%5.3"PRREAL" out of %5.3"PRREAL")", 
+            1.0*nparts*kpwgts[ncon*iargmax_strd(nparts, kpwgts+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)),
+            1.0*nparts*vwgt[ncon*iargmax_strd(nvtxs, vwgt+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)));
+    printf("\n");
+  }
+
+
+  /* Compute p-adjncy information */
+  padjncy = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjncy");
+  padjwgt = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt");
+  padjcut = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt");
+
+  iset(nparts, 0, kpwgts);
+  for (i=0; i<nvtxs; i++) {
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      if (where[i] != where[adjncy[j]]) {
+        padjncy[where[i]*nparts+where[adjncy[j]]] = 1;
+        padjcut[where[i]*nparts+where[adjncy[j]]] += adjwgt[j];
+        if (kpwgts[where[adjncy[j]]] == 0) {
+          padjwgt[where[i]*nparts+where[adjncy[j]]] += vsize[i];
+          kpwgts[where[adjncy[j]]] = 1;
+        }
+      }
+    }
+    for (j=xadj[i]; j<xadj[i+1]; j++) 
+      kpwgts[where[adjncy[j]]] = 0;
+  }
+
+  for (i=0; i<nparts; i++)
+    kpwgts[i] = isum(nparts, padjncy+i*nparts, 1);
+  printf("Min/Max/Avg/Bal # of adjacent     subdomains: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n",
+    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
+    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)));
+
+  for (i=0; i<nparts; i++)
+    kpwgts[i] = isum(nparts, padjcut+i*nparts, 1);
+  printf("Min/Max/Avg/Bal # of adjacent subdomain cuts: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n",
+    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
+    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)));
+
+  for (i=0; i<nparts; i++)
+    kpwgts[i] = isum(nparts, padjwgt+i*nparts, 1);
+  printf("Min/Max/Avg/Bal/Frac # of interface    nodes: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL" %7.3"PRREAL"\n",
+    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
+    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)), 1.0*isum(nparts, kpwgts, 1)/(1.0*nvtxs));
+
+
+  if (mustfree == 1 || mustfree == 3) {
+    gk_free((void **)&vwgt, LTERM);
+    graph->vwgt = NULL;
+  }
+  if (mustfree == 2 || mustfree == 3) {
+    gk_free((void **)&adjwgt, LTERM);
+    graph->adjwgt = NULL;
+  }
+
+  gk_free((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM);
+}
+
+
+/*************************************************************************
+* This function computes the balance of the partitioning
+**************************************************************************/
+void ComputePartitionBalance(graph_t *graph, idx_t nparts, idx_t *where, real_t *ubvec)
+{
+  idx_t i, j, nvtxs, ncon;
+  idx_t *kpwgts, *vwgt;
+  real_t balance;
+
+  nvtxs = graph->nvtxs;
+  ncon = graph->ncon;
+  vwgt = graph->vwgt;
+
+  kpwgts = ismalloc(nparts, 0, "ComputePartitionInfo: kpwgts");
+
+  if (vwgt == NULL) {
+    for (i=0; i<nvtxs; i++)
+      kpwgts[where[i]]++;
+    ubvec[0] = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*nvtxs);
+  }
+  else {
+    for (j=0; j<ncon; j++) {
+      iset(nparts, 0, kpwgts);
+      for (i=0; i<graph->nvtxs; i++)
+        kpwgts[where[i]] += vwgt[i*ncon+j];
+
+      ubvec[j] = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1));
+    }
+  }
+
+  gk_free((void **)&kpwgts, LTERM);
+
+}
+
+
+/*************************************************************************
+* This function computes the balance of the element partitioning
+**************************************************************************/
+real_t ComputeElementBalance(idx_t ne, idx_t nparts, idx_t *where)
+{
+  idx_t i;
+  idx_t *kpwgts;
+  real_t balance;
+
+  kpwgts = ismalloc(nparts, 0, "ComputeElementBalance: kpwgts");
+
+  for (i=0; i<ne; i++)
+    kpwgts[where[i]]++;
+
+  balance = 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1));
+
+  gk_free((void **)&kpwgts, LTERM);
+
+  return balance;
+
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/stdheaders.h b/3rdParty/metis/metis-5.1.0/libmetis/stdheaders.h
new file mode 100644
index 000000000..148f88d48
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/stdheaders.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * stdheaders.h
+ *
+ * This file includes all necessary header files
+ *
+ * Started 8/27/94
+ * George
+ *
+ * $Id: stdheaders.h 5993 2009-01-07 02:09:57Z karypis $
+ */
+
+#ifndef _LIBMETIS_STDHEADERS_H_
+#define _LIBMETIS_STDHEADERS_H_
+
+#include <stdio.h>
+#ifdef __STDC__
+#include <stdlib.h>
+#else
+#include <malloc.h>
+#endif
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <stdarg.h>
+#include <time.h>
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/struct.h b/3rdParty/metis/metis-5.1.0/libmetis/struct.h
new file mode 100644
index 000000000..5fc8588df
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/struct.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * struct.h
+ *
+ * This file contains data structures for ILU routines.
+ *
+ * Started 9/26/95
+ * George
+ *
+ * $Id: struct.h 13900 2013-03-24 15:27:07Z karypis $
+ */
+
+#ifndef _LIBMETIS_STRUCT_H_
+#define _LIBMETIS_STRUCT_H_
+
+
+
+/*************************************************************************/
+/*! This data structure stores cut-based k-way refinement info about an
+    adjacent subdomain for a given vertex. */
+/*************************************************************************/
+typedef struct cnbr_t {
+  idx_t pid;            /*!< The partition ID */
+  idx_t ed;             /*!< The sum of the weights of the adjacent edges
+                             that are incident on pid */
+} cnbr_t;
+
+
+/*************************************************************************/
+/*! The following data structure stores holds information on degrees for k-way
+    partition */
+/*************************************************************************/
+typedef struct ckrinfo_t {
+ idx_t id;              /*!< The internal degree of a vertex (sum of weights) */
+ idx_t ed;            	/*!< The total external degree of a vertex */
+ idx_t nnbrs;          	/*!< The number of neighboring subdomains */
+ idx_t inbr;            /*!< The index in the cnbr_t array where the nnbrs list 
+                             of neighbors is stored */
+} ckrinfo_t;
+
+
+/*************************************************************************/
+/*! This data structure stores volume-based k-way refinement info about an
+    adjacent subdomain for a given vertex. */
+/*************************************************************************/
+typedef struct vnbr_t {
+  idx_t pid;            /*!< The partition ID */
+  idx_t ned;            /*!< The number of the adjacent edges
+                             that are incident on pid */
+  idx_t gv;             /*!< The gain in volume achieved by moving the
+                             vertex to pid */
+} vnbr_t;
+
+
+/*************************************************************************/
+/*! The following data structure holds information on degrees for k-way
+    vol-based partition */
+/*************************************************************************/
+typedef struct vkrinfo_t {
+ idx_t nid;             /*!< The internal degree of a vertex (count of edges) */
+ idx_t ned;            	/*!< The total external degree of a vertex (count of edges) */
+ idx_t gv;            	/*!< The volume gain of moving that vertex */
+ idx_t nnbrs;          	/*!< The number of neighboring subdomains */
+ idx_t inbr;            /*!< The index in the vnbr_t array where the nnbrs list 
+                             of neighbors is stored */
+} vkrinfo_t;
+
+
+/*************************************************************************/
+/*! The following data structure holds information on degrees for k-way
+    partition */
+/*************************************************************************/
+typedef struct nrinfo_t {
+ idx_t edegrees[2];  
+} nrinfo_t;
+
+
+/*************************************************************************/
+/*! This data structure holds a graph */
+/*************************************************************************/
+typedef struct graph_t {
+  idx_t nvtxs, nedges;	/* The # of vertices and edges in the graph */
+  idx_t ncon;		/* The # of constrains */ 
+  idx_t *xadj;		/* Pointers to the locally stored vertices */
+  idx_t *vwgt;		/* Vertex weights */
+  idx_t *vsize;		/* Vertex sizes for min-volume formulation */
+  idx_t *adjncy;        /* Array that stores the adjacency lists of nvtxs */
+  idx_t *adjwgt;        /* Array that stores the weights of the adjacency lists */
+
+  idx_t *tvwgt;         /* The sum of the vertex weights in the graph */
+  real_t *invtvwgt;     /* The inverse of the sum of the vertex weights in the graph */
+
+
+  /* These are to keep track control if the corresponding fields correspond to
+     application or library memory */
+  int free_xadj, free_vwgt, free_vsize, free_adjncy, free_adjwgt;
+
+  idx_t *label;
+
+  idx_t *cmap;
+
+  /* Partition parameters */
+  idx_t mincut, minvol;
+  idx_t *where, *pwgts;
+  idx_t nbnd;
+  idx_t *bndptr, *bndind;
+
+  /* Bisection refinement parameters */
+  idx_t *id, *ed;
+
+  /* K-way refinement parameters */
+  ckrinfo_t *ckrinfo;   /*!< The per-vertex cut-based refinement info */
+  vkrinfo_t *vkrinfo;   /*!< The per-vertex volume-based refinement info */
+
+  /* Node refinement information */
+  nrinfo_t *nrinfo;
+
+  struct graph_t *coarser, *finer;
+} graph_t;
+
+
+/*************************************************************************/
+/*! This data structure holds a mesh */
+/*************************************************************************/
+typedef struct mesh_t {
+  idx_t ne, nn;	        /*!< The # of elements and nodes in the mesh */
+  idx_t ncon;           /*!< The number of element balancing constraints (element weights) */
+
+  idx_t *eptr, *eind;   /*!< The CSR-structure storing the nodes in the elements */
+  idx_t *ewgt;          /*!< The weights of the elements */
+} mesh_t;
+
+
+
+/*************************************************************************/
+/*! The following structure stores information used by Metis */
+/*************************************************************************/
+typedef struct ctrl_t {
+  moptype_et  optype;	        /* Type of operation */
+  mobjtype_et objtype;          /* Type of refinement objective */
+  mdbglvl_et  dbglvl;		/* Controls the debuging output of the program */
+  mctype_et   ctype;		/* The type of coarsening */
+  miptype_et  iptype;		/* The type of initial partitioning */
+  mrtype_et   rtype;		/* The type of refinement */
+
+  idx_t CoarsenTo;		/* The # of vertices in the coarsest graph */
+  idx_t nIparts;                /* The number of initial partitions to compute */
+  idx_t no2hop;                 /* Indicates if 2-hop matching will be used */
+  idx_t minconn;                /* Indicates if the subdomain connectivity will be minimized */
+  idx_t contig;                 /* Indicates if contigous partitions are required */
+  idx_t nseps;			/* The number of separators to be found during multiple bisections */
+  idx_t ufactor;                /* The user-supplied load imbalance factor */
+  idx_t compress;               /* If the graph will be compressed prior to ordering */
+  idx_t ccorder;                /* If connected components will be ordered separately */
+  idx_t seed;                   /* The seed for the random number generator */
+  idx_t ncuts;                  /* The number of different partitionings to compute */
+  idx_t niter;                  /* The number of iterations during each refinement */
+  idx_t numflag;                /* The user-supplied numflag for the graph */
+  idx_t *maxvwgt;		/* The maximum allowed weight for a vertex */
+
+  idx_t ncon;                   /*!< The number of balancing constraints */
+  idx_t nparts;                 /*!< The number of partitions */
+
+  real_t pfactor;		/* .1*(user-supplied prunning factor) */
+
+  real_t *ubfactors;            /*!< The per-constraint ubfactors */
+  
+  real_t *tpwgts;               /*!< The target partition weights */
+  real_t *pijbm;                /*!< The nparts*ncon multiplies for the ith partition
+                                     and jth constraint for obtaining the balance */
+
+  real_t cfactor;               /*!< The achieved compression factor */
+
+  /* Various Timers */
+  double TotalTmr, InitPartTmr, MatchTmr, ContractTmr, CoarsenTmr, UncoarsenTmr, 
+         RefTmr, ProjectTmr, SplitTmr, Aux1Tmr, Aux2Tmr, Aux3Tmr;
+
+  /* Workspace information */
+  gk_mcore_t *mcore;    /*!< The persistent memory core for within function 
+                             mallocs/frees */
+
+  /* These are for use by the k-way refinement routines */
+  size_t nbrpoolsize;      /*!< The number of {c,v}nbr_t entries that have been allocated */
+  size_t nbrpoolcpos;      /*!< The position of the first free entry in the array */
+  size_t nbrpoolreallocs;  /*!< The number of times the pool was resized */
+
+  cnbr_t *cnbrpool;     /*!< The pool of cnbr_t entries to be used during refinement.
+                             The size and current position of the pool is controlled
+                             by nnbrs & cnbrs */
+  vnbr_t *vnbrpool;     /*!< The pool of vnbr_t entries to be used during refinement.
+                             The size and current position of the pool is controlled
+                             by nnbrs & cnbrs */
+
+  /* The subdomain graph, in sparse format  */ 
+  idx_t *maxnads;               /* The maximum allocated number of adjacent domains */
+  idx_t *nads;                  /* The number of adjacent domains */
+  idx_t **adids;                /* The IDs of the adjacent domains */
+  idx_t **adwgts;               /* The edge-weight to the adjacent domains */
+  idx_t *pvec1, *pvec2;         /* Auxiliar nparts-size vectors for efficiency */
+
+} ctrl_t;
+
+
+
+#endif
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/timing.c b/3rdParty/metis/metis-5.1.0/libmetis/timing.c
new file mode 100644
index 000000000..9d6e05cf1
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/timing.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * timing.c
+ *
+ * This file contains routines that deal with timing Metis
+ *
+ * Started 7/24/97
+ * George
+ *
+ * $Id: timing.c 13936 2013-03-30 03:59:09Z karypis $
+ *
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************
+* This function clears the timers
+**************************************************************************/
+void InitTimers(ctrl_t *ctrl)
+{
+  gk_clearcputimer(ctrl->TotalTmr);
+  gk_clearcputimer(ctrl->InitPartTmr);
+  gk_clearcputimer(ctrl->MatchTmr);
+  gk_clearcputimer(ctrl->ContractTmr);
+  gk_clearcputimer(ctrl->CoarsenTmr);
+  gk_clearcputimer(ctrl->UncoarsenTmr);
+  gk_clearcputimer(ctrl->RefTmr);
+  gk_clearcputimer(ctrl->ProjectTmr);
+  gk_clearcputimer(ctrl->SplitTmr);
+  gk_clearcputimer(ctrl->Aux1Tmr);
+  gk_clearcputimer(ctrl->Aux2Tmr);
+  gk_clearcputimer(ctrl->Aux3Tmr);
+}
+
+
+
+/*************************************************************************
+* This function prints the various timers
+**************************************************************************/
+void PrintTimers(ctrl_t *ctrl)
+{
+  printf("\nTiming Information -------------------------------------------------");
+  printf("\n Multilevel: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->TotalTmr));
+  printf("\n     Coarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->CoarsenTmr));
+  printf("\n            Matching: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->MatchTmr));
+  printf("\n            Contract: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ContractTmr));
+  printf("\n     Initial Partition: \t %7.3"PRREAL"", gk_getcputimer(ctrl->InitPartTmr));
+  printf("\n     Uncoarsening: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->UncoarsenTmr));
+  printf("\n          Refinement: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->RefTmr));
+  printf("\n          Projection: \t\t\t %7.3"PRREAL"", gk_getcputimer(ctrl->ProjectTmr));
+  printf("\n     Splitting: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->SplitTmr));
+/*
+  printf("\n       Aux1Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux1Tmr));
+  printf("\n       Aux2Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux2Tmr));
+  printf("\n       Aux3Tmr: \t\t %7.3"PRREAL"", gk_getcputimer(ctrl->Aux3Tmr));
+*/
+  printf("\n********************************************************************\n");
+}
+
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/util.c b/3rdParty/metis/metis-5.1.0/libmetis/util.c
new file mode 100644
index 000000000..7fbc46726
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/util.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright 1997, Regents of the University of Minnesota
+ *
+ * util.c
+ *
+ * This function contains various utility routines
+ *
+ * Started 9/28/95
+ * George
+ *
+ * $Id: util.c 10495 2011-07-06 16:04:45Z karypis $
+ */
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function initializes the random number generator 
+  */
+/*************************************************************************/
+void InitRandom(idx_t seed)
+{
+  isrand((seed == -1 ? 4321 : seed)); 
+}
+
+
+/*************************************************************************/
+/*! Returns the highest weight index of x[i]*y[i] 
+ */
+/*************************************************************************/
+idx_t iargmax_nrm(size_t n, idx_t *x, real_t *y)
+{
+  idx_t i, max=0;
+      
+  for (i=1; i<n; i++)
+     max = (x[i]*y[i] > x[max]*y[max] ? i : max);
+                
+  return max;
+}
+
+
+/*************************************************************************/
+/*! These functions return the index of the maximum element in a vector
+  */
+/*************************************************************************/
+idx_t iargmax_strd(size_t n, idx_t *x, idx_t incx)
+{
+  size_t i, max=0;
+
+  n *= incx;
+  for (i=incx; i<n; i+=incx)
+    max = (x[i] > x[max] ? i : max);
+
+  return max/incx;
+}
+
+
+/*************************************************************************/
+/*! These functions return the index of the almost maximum element in a 
+    vector
+ */
+/*************************************************************************/
+idx_t rargmax2(size_t n, real_t *x)
+{
+  size_t i, max1, max2;
+
+  if (x[0] > x[1]) {
+    max1 = 0;
+    max2 = 1;
+  }
+  else {
+    max1 = 1;
+    max2 = 0;
+  }
+
+  for (i=2; i<n; i++) {
+    if (x[i] > x[max1]) {
+      max2 = max1;
+      max1 = i;
+    }
+    else if (x[i] > x[max2])
+      max2 = i;
+  }
+
+  return max2;
+}
+
+
+/*************************************************************************/
+/*! These functions return the index of the second largest elements in the
+    vector formed by x.y where '.' is element-wise multiplication */
+/*************************************************************************/
+idx_t iargmax2_nrm(size_t n, idx_t *x, real_t *y)
+{
+  size_t i, max1, max2;
+
+  if (x[0]*y[0] > x[1]*y[1]) {
+    max1 = 0;
+    max2 = 1;
+  }
+  else {
+    max1 = 1;
+    max2 = 0;
+  }
+
+  for (i=2; i<n; i++) {
+    if (x[i]*y[i] > x[max1]*y[max1]) {
+      max2 = max1;
+      max1 = i;
+    }
+    else if (x[i]*y[i] > x[max2]*y[max2])
+      max2 = i;
+  }
+
+  return max2;
+}
+
+
+/*************************************************************************/
+/*! converts a signal code into a Metis return code 
+ */
+/*************************************************************************/
+int metis_rcode(int sigrval)
+{
+  switch (sigrval) {
+    case 0:
+      return METIS_OK;
+      break;
+    case SIGMEM:
+      return METIS_ERROR_MEMORY;
+      break;
+    default:
+      return METIS_ERROR;
+      break;
+  }
+}
+
+
diff --git a/3rdParty/metis/metis-5.1.0/libmetis/wspace.c b/3rdParty/metis/metis-5.1.0/libmetis/wspace.c
new file mode 100644
index 000000000..a474c3cb6
--- /dev/null
+++ b/3rdParty/metis/metis-5.1.0/libmetis/wspace.c
@@ -0,0 +1,214 @@
+/*!
+\file 
+\brief Functions dealing with memory allocation and workspace management
+
+\date Started 2/24/96
+\author George
+\author Copyright 1997-2009, Regents of the University of Minnesota 
+\version $Id: wspace.c 10492 2011-07-06 09:28:42Z karypis $
+*/
+
+#include "metislib.h"
+
+
+/*************************************************************************/
+/*! This function allocates memory for the workspace */
+/*************************************************************************/
+void AllocateWorkSpace(ctrl_t *ctrl, graph_t *graph)
+{
+  size_t coresize;
+
+  switch (ctrl->optype) {
+    case METIS_OP_PMETIS:
+      coresize = 3*(graph->nvtxs+1)*sizeof(idx_t) + 
+                 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + 
+                 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t);
+      break;
+    default:
+      coresize = 4*(graph->nvtxs+1)*sizeof(idx_t) + 
+                 5*(ctrl->nparts+1)*graph->ncon*sizeof(idx_t) + 
+                 5*(ctrl->nparts+1)*graph->ncon*sizeof(real_t);
+  }
+  /*coresize = 0;*/
+  ctrl->mcore = gk_mcoreCreate(coresize);
+
+  ctrl->nbrpoolsize = 0;
+  ctrl->nbrpoolcpos = 0;
+}
+
+
+/*************************************************************************/
+/*! This function allocates refinement-specific memory for the workspace */
+/*************************************************************************/
+void AllocateRefinementWorkSpace(ctrl_t *ctrl, idx_t nbrpoolsize)
+{
+  ctrl->nbrpoolsize     = nbrpoolsize;
+  ctrl->nbrpoolcpos     = 0;
+  ctrl->nbrpoolreallocs = 0;
+
+  switch (ctrl->objtype) {
+    case METIS_OBJTYPE_CUT:
+      ctrl->cnbrpool = (cnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(cnbr_t), 
+                             "AllocateRefinementWorkSpace: cnbrpool");
+      break;
+
+    case METIS_OBJTYPE_VOL:
+      ctrl->vnbrpool = (vnbr_t *)gk_malloc(ctrl->nbrpoolsize*sizeof(vnbr_t), 
+                             "AllocateRefinementWorkSpace: vnbrpool");
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
+  }
+
+
+  /* Allocate the memory for the sparse subdomain graph */
+  if (ctrl->minconn) {
+    ctrl->pvec1   = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec1");
+    ctrl->pvec2   = imalloc(ctrl->nparts+1, "AllocateRefinementWorkSpace: pvec2");
+    ctrl->maxnads = ismalloc(ctrl->nparts, INIT_MAXNAD, "AllocateRefinementWorkSpace: maxnads");
+    ctrl->nads    = imalloc(ctrl->nparts, "AllocateRefinementWorkSpace: nads");
+    ctrl->adids   = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adids");
+    ctrl->adwgts  = iAllocMatrix(ctrl->nparts, INIT_MAXNAD, 0, "AllocateRefinementWorkSpace: adwgts");
+  }
+}
+
+
+/*************************************************************************/
+/*! This function frees the workspace */
+/*************************************************************************/
+void FreeWorkSpace(ctrl_t *ctrl)
+{
+  gk_mcoreDestroy(&ctrl->mcore, ctrl->dbglvl&METIS_DBG_INFO);
+
+  IFSET(ctrl->dbglvl, METIS_DBG_INFO,
+      printf(" nbrpool statistics\n" 
+             "        nbrpoolsize: %12zu   nbrpoolcpos: %12zu\n"
+             "    nbrpoolreallocs: %12zu\n\n",
+             ctrl->nbrpoolsize,  ctrl->nbrpoolcpos, 
+             ctrl->nbrpoolreallocs));
+
+  gk_free((void **)&ctrl->cnbrpool, &ctrl->vnbrpool, LTERM);
+  ctrl->nbrpoolsize = 0;
+  ctrl->nbrpoolcpos = 0;
+
+  if (ctrl->minconn) {
+    iFreeMatrix(&(ctrl->adids),  ctrl->nparts, INIT_MAXNAD);
+    iFreeMatrix(&(ctrl->adwgts), ctrl->nparts, INIT_MAXNAD);
+
+    gk_free((void **)&ctrl->pvec1, &ctrl->pvec2, 
+        &ctrl->maxnads, &ctrl->nads, LTERM);
+  }
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the workspace/heap */
+/*************************************************************************/
+void *wspacemalloc(ctrl_t *ctrl, size_t nbytes)
+{
+  return gk_mcoreMalloc(ctrl->mcore, nbytes);
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes */
+/*************************************************************************/
+void wspacepush(ctrl_t *ctrl)
+{
+  gk_mcorePush(ctrl->mcore);
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push */
+/*************************************************************************/
+void wspacepop(ctrl_t *ctrl)
+{
+  gk_mcorePop(ctrl->mcore);
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core  */
+/*************************************************************************/
+idx_t *iwspacemalloc(ctrl_t *ctrl, idx_t n)
+{
+  return (idx_t *)wspacemalloc(ctrl, n*sizeof(idx_t));
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core */
+/*************************************************************************/
+real_t *rwspacemalloc(ctrl_t *ctrl, idx_t n)
+{
+  return (real_t *)wspacemalloc(ctrl, n*sizeof(real_t));
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core  */
+/*************************************************************************/
+ikv_t *ikvwspacemalloc(ctrl_t *ctrl, idx_t n)
+{
+  return (ikv_t *)wspacemalloc(ctrl, n*sizeof(ikv_t));
+}
+
+
+/*************************************************************************/
+/*! This function resets the cnbrpool */
+/*************************************************************************/
+void cnbrpoolReset(ctrl_t *ctrl)
+{
+  ctrl->nbrpoolcpos = 0;
+}
+
+
+/*************************************************************************/
+/*! This function gets the next free index from cnbrpool */
+/*************************************************************************/
+idx_t cnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs)
+{
+  ctrl->nbrpoolcpos += nnbrs;
+
+  if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) {
+    ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2);
+
+    ctrl->cnbrpool = (cnbr_t *)gk_realloc(ctrl->cnbrpool,  
+                          ctrl->nbrpoolsize*sizeof(cnbr_t), "cnbrpoolGet: cnbrpool");
+    ctrl->nbrpoolreallocs++;
+  }
+
+  return ctrl->nbrpoolcpos - nnbrs;
+}
+
+
+/*************************************************************************/
+/*! This function resets the vnbrpool */
+/*************************************************************************/
+void vnbrpoolReset(ctrl_t *ctrl)
+{
+  ctrl->nbrpoolcpos = 0;
+}
+
+
+/*************************************************************************/
+/*! This function gets the next free index from vnbrpool */
+/*************************************************************************/
+idx_t vnbrpoolGetNext(ctrl_t *ctrl, idx_t nnbrs)
+{
+  ctrl->nbrpoolcpos += nnbrs;
+
+  if (ctrl->nbrpoolcpos > ctrl->nbrpoolsize) {
+    ctrl->nbrpoolsize += gk_max(10*nnbrs, ctrl->nbrpoolsize/2);
+
+    ctrl->vnbrpool = (vnbr_t *)gk_realloc(ctrl->vnbrpool,  
+                          ctrl->nbrpoolsize*sizeof(vnbr_t), "vnbrpoolGet: vnbrpool");
+    ctrl->nbrpoolreallocs++;
+  }
+
+  return ctrl->nbrpoolcpos - nnbrs;
+}
+
diff --git a/cpu.cmake b/cpu.cmake
index 30403989e..397d54c3a 100644
--- a/cpu.cmake
+++ b/cpu.cmake
@@ -74,7 +74,7 @@ IF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ia64")
 ENDIF()
 
 if(${USE_METIS} AND NOT METIS_INCLUDEDIR)
-    add_subdirectory(${VF_THIRD_DIR}/metis/metis-5.1.1)
+    add_subdirectory(${VF_THIRD_DIR}/metis/metis-5.1.0)
 endif()
 
 
-- 
GitLab