diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 3d73ffc63ee4c719f751d19c9607e261acbabcc8..b145a3178f7e32740c5c9fdf98e8bbfabda04fae 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -43,3 +43,14 @@ src/USER-MISC/*_grem.*              @dstelter92
 
 # tools
 tools/msi2lmp/*       @akohlmey
+
+# cmake
+cmake/*               @junghans @rbberger
+
+# python
+python/*              @rbberger
+
+# docs
+doc/utils/*/*         @rbberger
+doc/Makefile          @rbberger
+doc/README            @rbberger
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 4df2ddd0752bae38a0bce6d7cd797dc0c1b18fb1..7e4fd690e9a4a72494065df2fb5e6220333155f2 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -49,6 +49,11 @@ if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
 endif()
 
+option(ENABLE_COVERAGE "Enable code coverage" OFF)
+if(ENABLE_COVERAGE)
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
+endif()
+
 ########################################################################
 # User input options                                                   #
 ########################################################################
@@ -147,23 +152,12 @@ pkg_depends(CORESHELL KSPACE)
 ######################################################
 if(PKG_REAX OR PKG_MEAM OR PKG_USER-QUIP OR PKG_USER-QMMM OR PKG_LATTE)
   enable_language(Fortran)
-  list(APPEND LAMMPS_LINK_LIBS ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
 endif()
 
 if(PKG_MEAM OR PKG_USER-H5MD OR PKG_USER-QMMM)
   enable_language(C)
 endif()
 
-if(PKG_MSCG)
-  if (CMAKE_VERSION VERSION_LESS "3.1") 
-    message(FATAL_ERROR "For the MSCG package you need at least cmake-3.1")
-  endif()
-  # starting with CMake 3.1 this is all you have to do to enforce C++11
-  set(CMAKE_CXX_STANDARD 11) # C++11...
-  set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required...
-  set(CMAKE_CXX_EXTENSIONS OFF) #...without compiler extensions like gnu++11
-endif()
-
 find_package(OpenMP QUIET)
 option(BUILD_OMP "Build with OpenMP support" ${OpenMP_FOUND})
 if(BUILD_OMP OR PKG_USER-OMP OR PKG_KOKKOS OR PKG_USER-INTEL)
@@ -207,7 +201,7 @@ if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-QUIP OR PKG_LATTE)
   find_package(LAPACK)
   if(NOT LAPACK_FOUND)
     enable_language(Fortran)
-    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.f)
+    file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.[fF])
     add_library(linalg STATIC ${LAPACK_SOURCES})
     set(LAPACK_LIBRARIES linalg)
   endif()
@@ -302,8 +296,8 @@ if(PKG_LATTE)
     message(STATUS "LATTE not found - we will build our own")
     include(ExternalProject)
     ExternalProject_Add(latte_build
-      URL https://github.com/lanl/LATTE/archive/v1.1.1.tar.gz
-      URL_MD5 cb86f1d2473ce00aa61ff6a023154b03
+      URL https://github.com/lanl/LATTE/archive/v1.2.1.tar.gz
+      URL_MD5 bed76e7e76c545c36dd848a8f1fd35eb
       SOURCE_SUBDIR cmake
       CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE}
       )
@@ -340,13 +334,10 @@ if(PKG_USER-SMD)
     ExternalProject_Add(Eigen3_build
       URL http://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz 
       URL_MD5 1a47e78efe365a97de0c022d127607c3
-      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DEIGEN_TEST_NOQT=ON
-        -DCMAKE_DISABLE_FIND_PACKAGE_LAPACK=ON -DCMAKE_DISABLE_FIND_PACKAGE_Cholmod=ON -DCMAKE_DISABLE_FIND_PACKAGE_Umfpack=ON -DCMAKE_DISABLE_FIND_PACKAGE_SuperLU=ON
-        -DCMAKE_DISABLE_FIND_PACKAGE_PASTIX=ON -DCMAKE_DISABLE_FIND_PACKAGE_SPQR=ON -DCMAKE_DISABLE_FIND_PACKAGE_Boost=ON -DCMAKE_DISABLE_FIND_PACKAGE_CUDA=ON
-        -DCMAKE_DISABLE_FIND_PACKAGE_FFTW=ON -DCMAKE_DISABLE_FIND_PACKAGE_MPFR=ON -DCMAKE_DISABLE_FIND_PACKAGE_OpenGL=ON
-        )
-    ExternalProject_get_property(Eigen3_build INSTALL_DIR)
-    set(EIGEN3_INCLUDE_DIR ${INSTALL_DIR}/include/eigen3)
+      CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND ""
+    )
+    ExternalProject_get_property(Eigen3_build SOURCE_DIR)
+    set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR})
     list(APPEND LAMMPS_DEPS Eigen3_build)
   else()
     find_package(Eigen3)
@@ -402,26 +393,36 @@ endif()
 
 if(PKG_MSCG)
   find_package(GSL REQUIRED)
-  set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/mscg)
-  set(MSCG_TARBALL ${LAMMPS_LIB_MSCG_BIN_DIR}/MS-CG-master.zip)
-  set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_MSCG_BIN_DIR}/MSCG-release-master/src)
-  if(NOT EXISTS ${LAMMPS_LIB_MSCG_BIN_DIR})
-    if(NOT EXISTS ${MSCG_TARBALL})
-      message(STATUS "Downloading ${MSCG_TARBALL}")
-      file(DOWNLOAD
-        https://github.com/uchicago-voth/MSCG-release/archive/master.zip
-        ${MSCG_TARBALL} SHOW_PROGRESS) #EXPECTED_MD5 cannot be due due to master
+  option(DOWNLOAD_MSCG "Download latte (instead of using the system's one)" OFF)
+  if(DOWNLOAD_MSCG)
+    include(ExternalProject)
+    if(NOT LAPACK_FOUND)
+      set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a")
+    endif()
+    ExternalProject_Add(mscg_build
+      URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz
+      URL_MD5 8c45e269ee13f60b303edd7823866a91
+      SOURCE_SUBDIR src/CMake
+      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE} ${EXTRA_MSCG_OPTS}
+      BUILD_COMMAND make mscg INSTALL_COMMAND ""
+      )
+    ExternalProject_get_property(mscg_build BINARY_DIR)
+    set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a)
+    ExternalProject_get_property(mscg_build SOURCE_DIR)
+    set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src)
+    list(APPEND LAMMPS_DEPS mscg_build)
+    if(NOT LAPACK_FOUND)
+      file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS})
+      add_dependencies(mscg_build linalg)
+    endif()
+  else()
+    find_package(MSCG)
+    if(NOT MSCG_FOUND)
+      message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
     endif()
-    message(STATUS "Unpacking ${MSCG_TARBALL}")
-    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ${MSCG_TARBALL}
-      WORKING_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/mscg)
   endif()
-  file(GLOB MSCG_SOURCES ${LAMMPS_LIB_MSCG_BIN_DIR}/*.cpp)
-  add_library(mscg STATIC ${MSCG_SOURCES})
-  list(APPEND LAMMPS_LINK_LIBS mscg)
-  target_compile_options(mscg PRIVATE -DDIMENSION=3 -D_exclude_gromacs=1)
-  target_include_directories(mscg PUBLIC ${LAMMPS_LIB_MSCG_BIN_DIR})
-  target_link_libraries(mscg ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
+  list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
+  include_directories(${MSCG_INCLUDE_DIRS})
 endif()
 
 if(PKG_COMPRESS)
@@ -800,6 +801,11 @@ include_directories(${LAMMPS_STYLE_HEADERS_DIR})
 # Actually add executable and lib to build
 ############################################
 add_library(lammps ${LIB_SOURCES})
+get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
+list (FIND LANGUAGES "Fortran" _index)
+if (${_index} GREATER -1)
+  list(APPEND LAMMPS_LINK_LIBS ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
+endif()
 list(REMOVE_DUPLICATES LAMMPS_LINK_LIBS)
 target_link_libraries(lammps ${LAMMPS_LINK_LIBS})
 if(LAMMPS_DEPS)
@@ -822,9 +828,38 @@ if(ENABLE_TESTING)
   add_test(ShowHelp lmp${LAMMPS_MACHINE} -help)
 endif()
 
-##################################
+###############################################################################
+# Testing
+#
+# Requires latest gcovr (for GCC 8.1 support):#
+# pip install git+https://github.com/gcovr/gcovr.git
+###############################################################################
+if(ENABLE_COVERAGE)
+    find_program(GCOVR_BINARY gcovr)
+    find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY)
+
+    if(GCOVR_FOUND)
+        get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE)
+
+        add_custom_target(
+            gen_coverage_xml
+            COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+            COMMENT "Generating XML Coverage Report..."
+        )
+
+        add_custom_target(
+            gen_coverage_html
+            COMMAND ${GCOVR_BINARY} -s  --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+            COMMENT "Generating HTML Coverage Report..."
+        )
+    endif()
+endif()
+
+###############################################################################
 # Print package summary
-##################################
+###############################################################################
 foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES} ${ACCEL_PACKAGES})
   if(PKG_${PKG})
     message(STATUS "Building package: ${PKG}")
diff --git a/cmake/Modules/FindMSCG.cmake b/cmake/Modules/FindMSCG.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..311ff7803836aa43c2c48bd4040a60686b1803db
--- /dev/null
+++ b/cmake/Modules/FindMSCG.cmake
@@ -0,0 +1,22 @@
+# - Find mscg
+# Find the native MSCG headers and libraries.
+#
+#  MSCG_INCLUDE_DIRS - where to find mscg.h, etc.
+#  MSCG_LIBRARIES    - List of libraries when using mscg.
+#  MSCG_FOUND        - True if mscg found.
+#
+
+find_path(MSCG_INCLUDE_DIR mscg.h PATH_SUFFIXES mscg)
+
+find_library(MSCG_LIBRARY NAMES mscg)
+
+set(MSCG_LIBRARIES ${MSCG_LIBRARY})
+set(MSCG_INCLUDE_DIRS ${MSCG_INCLUDE_DIR})
+
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set MSCG_FOUND to TRUE
+# if all listed variables are TRUE
+
+find_package_handle_standard_args(MSCG DEFAULT_MSG MSCG_LIBRARY MSCG_INCLUDE_DIR)
+
+mark_as_advanced(MSCG_INCLUDE_DIR MSCG_LIBRARY )
diff --git a/cmake/README.md b/cmake/README.md
index 67d5edc9f5024887f9db8d1ecdc35b656363c57a..8a582d82d95f04b33247dfb8ff982842489274e2 100644
--- a/cmake/README.md
+++ b/cmake/README.md
@@ -1,19 +1,1637 @@
-cmake-buildsystem
------------------
+# Building LAMMPS using CMake
 
-To use the cmake build system instead of the make-driven one, do:
+LAMMPS recently acquired support for building with CMake thanks to the efforts
+of Christoph Junghans (LANL) and Richard Berger (Temple U). One of the key
+strengths of CMake is that it can generate the necessary build system files of
+your own personal preference. It also enables using common development IDEs such
+as Eclipse, Visual Studio, QtCreator, Xcode and many more for LAMMPS
+development.
+
+CMake can both be used as a command-line (CLI) utility `cmake` or through one of
+its GUIs. `ccmake` is a text-based ui to configure and build CMake projects.
+`cmake-gui` is a graphical frontend for running CMake operations that works on
+Linux desktop environments, Windows and MacOS X.
+
+The following is a tutorial-style introduction in using the CMake system. It
+should give you the necessary foundation to understand how to do the most common
+tasks, act as a reference and provide examples of typical use cases.
+
+## Table of Contents
+
+   * [Quick Start for the Impatient](#quick-start-for-the-impatient)
+   * [Building LAMMPS using cmake](#building-lammps-using-cmake-1)
+      * [Prerequisites](#prerequisites)
+      * [Build directory vs. Source Directory](#build-directory-vs-source-directory)
+   * [Reference](#reference)
+      * [Common CMAKE Configuration Options](#common-cmake-configuration-options)
+      * [LAMMPS Configuration Options](#lammps-configuration-options)
+      * [Parallelization and Accelerator Packages](#parallelization-and-accelerator-packages)
+      * [Default Packages](#default-packages)
+      * [Other Packages](#other-packages)
+      * [User Packages](#user-packages)
+      * [Package-Specific Configuration Options](#package-specific-configuration-options)
+         * [KSPACE Package](#kspace-package)
+         * [MKL](#mkl)
+         * [FFTW2](#fftw2)
+         * [FFTW3](#fftw3)
+         * [LAPACK](#lapack)
+         * [PYTHON Package](#python-package)
+         * [GPU Package](#gpu-package)
+         * [VORONOI Package](#voronoi-package)
+         * [USER-SMD Package](#user-smd-package)
+      * [Optional Features](#optional-features)
+         * [zlib support](#zlib-support)
+         * [JPEG support](#jpeg-support)
+         * [PNG support](#png-support)
+         * [GZIP support](#gzip-support)
+         * [FFMPEG support](#ffmpeg-support)
+      * [Compilers](#compilers)
+         * [Building with GNU Compilers](#building-with-gnu-compilers)
+         * [Building with Intel Compilers](#building-with-intel-compilers)
+         * [Building with LLVM/Clang Compilers](#building-with-llvmclang-compilers)
+      * [Examples](#examples)
+
+
+## Quick Start for the Impatient
+If you want to skip ahead and just run the compilation using `cmake`, please
+find a minimal example below. Together with the options reference below, this
+should get you started.
+
+```bash
+git clone https://github.com/lammps/lammps.git
+mkdir lammps/build
+cd lammps/build
+cmake ../cmake [-DOPTION_A=VALUE_A -DOPTION_B=VALUE_B ...]
+make
+```
+
+# Building LAMMPS using `cmake`
+
+## Prerequisites
+This tutorial assumes you are running in a command-line environment using a
+shell like Bash.
+
+* Linux: any terminal window will work
+* MacOS X: launch the Terminal app
+* Windows 10: install and run "Bash on Windows" (aka Ubuntu on Windows)
+
+Before we start, please download the latest and greatest version of LAMMPS from
+GitHub. You can either download it as a tarball or ZIP file, or via git. When
+you start with a fresh lammps directory, the contents should look like this:
+
+```bash
+$ ls
+bench  doc       lib      potentials  README  tools
+cmake  examples  LICENSE  python      src
+```
+
+## Build directory vs. Source Directory
+
+By using CMake we separate building LAMMPS into multiple phases:
+
+1. **Configuration**: define which features we want to enable/disable and how it should be compiled
+2. **Compilation**: compile each source file and generate binaries and libraries based on the configuration
+3. **Installation** (Optional): finally we can install the generated binaries on our system
+
+In the GNU Make based build system of LAMMPS, configuration occurs by running
+special make targets like `make yes-MOLECULAR`. These targets modify the
+**source directory** (`src/`) directory by copying package files and patching
+Makefile. In some cases you are force to manually edit Makefiles to add compiler
+options and/or correct include directory and library paths.
+
+These edits and copy operations are no longer necessary when compiling with
+CMake. The source directory stays untouched, so you compile LAMMPS in many
+different variants using the same source code checkout. It enables true
+**out-of-source** builds.
+
+When using Cmake, you can compile in **any** folder outside of the source
+directory. Any working directory you choose becomes a so-called **build
+directory**. All configuration files and compilation results are stored in this
+folder. We recommend calling it something like `build/`.
+
+Let's have a look a quick example, where we get the greatest and latest version
+of LAMMPS from GitHub via git:
+```bash
+git clone https://github.com/lammps/lammps.git
 ```
-cmake /path/to/lammps/source/cmake
+
+We then create a new `build` folder and make it our current working directory:
+```
+mkdir lammps/build
+cd lammps/build
 ```
-(please note the cmake directory as the very end)
 
-To enable package, e.g. GPU do
+To configure LAMMPS we run `cmake` inside of this folder. However it requires at
+least one argument. `cmake` needs to read the LAMMPS `CMakeLists.txt` file to
+know what to do.  This file is located in the `cmake/` subdirectory of the
+lammps checkout. To run `cmake` add the relative or absolute path to the `cmake/`
+directory  as first argument.
+
+E.g., if the current working directory is `lammps/build` you can specify the
+relative path to `lammps/cmake` as follows:
 ```
-cmake /path/to/lammps/source/cmake -DPKG_GPU=ON
+cmake ../cmake
+```
+
+You could also specify the absolute path:
+```
+cmake /path/to/my/lammps/folder/cmake
+```
+
+Please note: **This does NOT compile the code!** Running cmake only configures
+the next build. It generates the necessary files to compile the code. On
+Unix/Linux it defaults to generating Makefiles. You can also choose other output
+formats to generate files for Eclipse, Xcode or Visual Studio which are
+supported on other platorms.
+
+To compile LAMMPS once the Makefiles are generated, simply type `make` in the
+build directory.
+
+```
+make
+```
+
+# Reference
+
+## Common CMAKE Configuration Options
+
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>CMAKE_INSTALL_PREFIX</code></td>
+  <td>Install location where LAMMPS files will be copied to. In the Unix/Linux case with Makefiles this controls what `make install` will do.</td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>CMAKE_BUILD_TYPE</code></td>
+  <td>Controls if debugging symbols are added to the generated binaries</td>
+  <td>
+  <dl>
+  <dt><code>Release</code> (default)</dt>
+  <dt><code>Debug</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+
+## LAMMPS Configuration Options
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>LAMMPS_SIZE_LIMIT</code></td>
+  <td>Controls the integer sizes used by LAMMPS internally</td>
+  <td>
+  <dl>
+    <dt><code>LAMMPS_SMALLBIG</code> (default)</dt>
+    <dd>32bit , 64bit</dd>
+    <dt><code>LAMMPS_SMALLSMALL</code></dt>
+    <dd>32bit , 32bit</dd>
+    <dt><code>LAMMPS_BIGBIG</code></dt>
+    <dd>64bit , 64bit</dd>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>LAMMPS_MEMALIGN</code></td>
+  <td>controls the alignment of blocks of memory allocated by LAMMPS</td>
+  <td>
+  <dl>
+    <dt><code>64</code> (default)</dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>LAMMPS_EXCEPTIONS</code></td>
+  <td>controls whether LAMMPS dies after an error or throws a C++ exception. This is particularly useful when running through the C library interface, since an error
+  in LAMMPS then doesn't kill the parent process</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>LAMMPS_MACHINE</code></td>
+  <td>allows appending a machine suffix to the generate LAMMPS binary</td>
+  <td>
+  <dl>
+    <dt>*none*  (default)</dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>BUILD_SHARED_LIBS</code></td>
+  <td>control whether to build LAMMPS as a shared-library</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>LAMMPS_LONGLONG_TO_LONG</code></td>
+  <td>Workaround if your system or MPI version does not recognize <code>long long</code> data types</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## Parallelization and Accelerator Packages
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>BUILD_MPI</code></td>
+  <td>control whether to build LAMMPS with MPI support. This will look for
+  `mpicxx` in your path and use this MPI implementation.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>BUILD_OMP</code></td>
+  <td>control whether to build LAMMPS with OpenMP support.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_OPT</code></td>
+  <td>
+  A handful of pair styles which are optimized for improved CPU performance on
+  single or multiple cores. These include EAM, LJ, CHARMM, and Morse potentials.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-OMP</code></td>
+  <td>
+  Hundreds of pair, fix, compute, bond, angle, dihedral, improper, and kspace
+  styles which are altered to enable threading on many-core CPUs via OpenMP
+  directives.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-INTEL</code></td>
+  <td>
+  Dozens of pair, fix, bond, angle, dihedral, improper, and kspace styles which
+  are optimized for Intel CPUs and KNLs (Knights Landing).
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_GPU</code></td>
+  <td>
+  Dozens of pair styles and a version of the PPPM long-range Coulombic solver
+  optimized for GPUs. All such styles have a â€œgpuâ€ as a suffix in their style
+  name. The GPU code can be compiled with either CUDA or OpenCL, however the
+  OpenCL variants are no longer actively maintained and only the CUDA versions
+  are regularly tested.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_KOKKOS</code></td>
+  <td>Dozens of atom, pair, bond, angle, dihedral, improper, fix, compute styles adapted to compile using the Kokkos library which can convert them to OpenMP or CUDA code so that they run efficiently on multicore CPUs, KNLs, or GPUs.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## Default Packages
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>ENABLE_ALL</code></td>
+  <td>Enable all default packages</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_ASPHERE</code></td>
+  <td>Computes, time-integration fixes, and pair styles for aspherical particle models including ellipsoids, 2d lines, and 3d triangles.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_BODY</code></td>
+  <td>Body-style particles with internal structure. Computes, time-integration fixes, pair styles, as well as the body styles themselves.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_CLASS2</code></td>
+  <td>Bond, angle, dihedral, improper, and pair styles for the COMPASS CLASS2 molecular force field.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_COLLOID</code></td>
+  <td>Coarse-grained finite-size colloidal particles. Pair styles and fix wall styles for colloidal interactions. Includes the Fast Lubrication Dynamics (FLD) method for hydrodynamic interactions, which is a simplified approximation to Stokesian dynamics.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_COMPRESS</code></td>
+  <td>Compressed output of dump files via the zlib compression library, using dump styles with a â€œgzâ€ in their style name.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_CORESHELL</code></td>
+  <td>Compute and pair styles that implement the adiabatic core/shell model for polarizability. The pair styles augment Born, Buckingham, and Lennard-Jones styles with core/shell capabilities. The compute temp/cs command calculates the temperature of a system with core/shell particles.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_DIPOLE</code></td>
+  <td>An atom style and several pair styles for point dipole models with short-range or long-range interactions.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_GRANULAR</code></td>
+  <td>Pair styles and fixes for finite-size granular particles, which interact with each other and boundaries via frictional and dissipative potentials.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_KSPACE</code></td>
+  <td>A variety of long-range Coulombic solvers, as well as pair styles which compute the corresponding short-range pairwise Coulombic interactions. These include Ewald, particle-particle particle-mesh (PPPM), and multilevel summation method (MSM) solvers.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MANYBODY</code></td>
+  <td>
+  A variety of manybody and bond-order potentials. These include (AI)REBO, BOP,
+  EAM, EIM, Stillinger-Weber, and Tersoff potentials.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MC</code></td>
+  <td>
+  Several fixes and a pair style that have Monte Carlo (MC) or MC-like
+  attributes. These include fixes for creating, breaking, and swapping bonds,
+  for performing atomic swaps, and performing grand-canonical MC (GCMC) in
+  conjuction with dynamics.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MEAM</code></td>
+  <td>
+<p>A pair style for the modified embedded atom (MEAM) potential.</p>
+
+<p><strong>Please note that the MEAM package has been superseded by the USER-MEAMC package,
+which is a direct translation of the MEAM package to C++. USER-MEAMC contains
+additional optimizations making it run faster than MEAM on most machines, while
+providing the identical features and USER interface.</strong></p>
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MISC</code></td>
+  <td>
+  A variety of compute, fix, pair, dump styles with specialized capabilities that
+  donâ€™t align with other packages.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MOLECULE</code></td>
+  <td>
+  A large number of atom, pair, bond, angle, dihedral, improper styles that are
+  used to model molecular systems with fixed covalent bonds. The pair styles
+  include the Dreiding (hydrogen-bonding) and CHARMM force fields, and a TIP4P
+  water model.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_PERI</code></td>
+  <td>
+  An atom style, several pair styles which implement different Peridynamics
+  materials models, and several computes which calculate diagnostics.
+  Peridynamics is a a particle-based meshless continuum model.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_QEQ</code></td>
+  <td>
+  Several fixes for performing charge equilibration (QEq) via different
+  algorithms. These can be used with pair styles that perform QEq as part of
+  their formulation.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_REAX</code></td>
+  <td>
+  A pair style which wraps a Fortran library which implements the ReaxFF
+  potential, which is a universal reactive force field. See the USER-REAXC
+  package for an alternate implementation in C/C++. Also a fix reax/bonds
+  command for monitoring molecules as bonds are created and destroyed.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_REPLICA</code></td>
+  <td>
+  A collection of multi-replica methods which can be used when running multiple
+  LAMMPS simulations (replicas). See Section 6.5 for an overview of how to run
+  multi-replica simulations in LAMMPS. Methods in the package include nudged
+  elastic band (NEB), parallel replica dynamics (PRD), temperature accelerated
+  dynamics (TAD), parallel tempering, and a verlet/split algorithm for
+  performing long-range Coulombics on one set of processors, and the remainder
+  of the force field calcalation on another set.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_RIGID</code></td>
+  <td>
+  Fixes which enforce rigid constraints on collections of atoms or particles.
+  This includes SHAKE and RATTLE, as well as varous rigid-body integrators for a
+  few large bodies or many small bodies. Also several computes which calculate
+  properties of rigid bodies.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_SHOCK</code></td>
+  <td>
+  Fixes for running impact simulations where a shock-wave passes through a
+  material.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_SNAP</code></td>
+  <td>
+  A pair style for the spectral neighbor analysis potential (SNAP). SNAP is
+  methodology for deriving a highly accurate classical potential fit to a large
+  archive of quantum mechanical (DFT) data. Also several computes which analyze
+  attributes of the potential.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_SRD</code></td>
+  <td>
+  A pair of fixes which implement the Stochastic Rotation Dynamics (SRD) method
+  for coarse-graining of a solvent, typically around large colloidal particles.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## Other Packages
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>PKG_KIM</code></td>
+  <td>A <code>pair_style kim</code> command which is a wrapper on the Knowledge Base for Interatomic Models (KIM) repository of interatomic potentials, enabling any of them to be used in LAMMPS simulations.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_PYTHON</code></td>
+  <td>Enable support for Python scripting inside of LAMMPS.</td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MSCG</code></td>
+  <td>
+  A fix mscg command which can parameterize a Multi-Scale Coarse-Graining (MSCG)
+  model using the open-source MS-CG library.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_MPIIO</code></td>
+  <td>
+  Support for parallel output/input of dump and restart files via the MPIIO library.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_POEMS</code></td>
+  <td>
+  A fix that wraps the Parallelizable Open source Efficient Multibody Software
+  (POEMS) library, which is able to simulate the dynamics of articulated body
+  systems. These are systems with multiple rigid bodies (collections of
+  particles) whose motion is coupled by connections at hinge points.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_LATTE</code></td>
+  <td>
+  A fix command which wraps the LATTE DFTB code, so that molecular dynamics can
+  be run with LAMMPS using density-functional tight-binding quantum forces
+  calculated by LATTE.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## User Packages
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>PKG_USER-ATC</code></td>
+  <td>
+  ATC stands for atoms-to-continuum. This package implements a fix atc command
+  to either couple molecular dynamics with continuum finite element equations or
+  perform on-the-fly conversion of atomic information to continuum fields.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-AWPMD</code></td>
+  <td>
+  AWPMD stands for Antisymmetrized Wave Packet Molecular Dynamics. This package
+  implements an atom, pair, and fix style which allows electrons to be treated
+  as explicit particles in a classical molecular dynamics model.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-CGDNA</code></td>
+  <td>
+  Several pair styles, a bond style, and integration fixes for coarse-grained
+  models of single- and double-stranded DNA based on the oxDNA model of Doye,
+  Louis and Ouldridge at the University of Oxford. This includes Langevin-type
+  rigid-body integrators with improved stability.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-CGSDK</code></td>
+  <td>
+  Several pair styles and an angle style which implement the coarse-grained SDK
+  model of Shinoda, DeVane, and Klein which enables simulation of ionic liquids,
+  electrolytes, lipids and charged amino acids.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-COLVARS</code></td>
+  <td>
+  COLVARS stands for collective variables, which can be used to implement
+  various enhanced sampling methods, including Adaptive Biasing Force,
+  Metadynamics, Steered MD, Umbrella Sampling and Restraints. A fix colvars
+  command is implemented which wraps a COLVARS library, which implements these
+  methods. simulations.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-DIFFRACTION</code></td>
+  <td>
+  Two computes and a fix for calculating x-ray and electron diffraction
+  intensities based on kinematic diffraction theory.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-DPD</code></td>
+  <td>
+  DPD stands for dissipative particle dynamics. This package implements
+  coarse-grained DPD-based models for energetic, reactive molecular crystalline
+  materials. It includes many pair styles specific to these systems, including
+  for reactive DPD, where each particle has internal state for multiple species
+  and a coupled set of chemical reaction ODEs are integrated each timestep.
+  Highly accurate time integrators for isothermal, isoenergetic, isobaric and
+  isenthalpic conditions are included. These enable long timesteps via the
+  Shardlow splitting algorithm.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-DRUDE</code></td>
+  <td>
+  Fixes, pair styles, and a compute to simulate thermalized Drude oscillators as
+  a model of polarization.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-EFF</code></td>
+  <td>
+  EFF stands for electron force field which allows a classical MD code to model
+  electrons as particles of variable radius. This package contains atom, pair,
+  fix and compute styles which implement the eFF as described in A.
+  Jaramillo-Botero, J. Su, Q. An, and W.A. Goddard III, JCC, 2010. The eFF
+  potential was first introduced by Su and Goddard, in 2007.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-FEP</code></td>
+  <td>
+  FEP stands for free energy perturbation. This package provides methods for
+  performing FEP simulations by using a fix adapt/fep command with soft-core
+  pair potentials, which have a â€œsoftâ€ in their style name.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-H5MD</code></td>
+  <td>
+  H5MD stands for HDF5 for MD. HDF5 is a portable, binary, self-describing file
+  format, used by many scientific simulations. H5MD is a format for molecular
+  simulations, built on top of HDF5. This package implements a dump h5md command
+  to output LAMMPS snapshots in this format.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-LB</code></td>
+  <td>
+  Fixes which implement a background Lattice-Boltzmann (LB) fluid, which can be
+  used to model MD particles influenced by hydrodynamic forces.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MANIFOLD</code></td>
+  <td>
+  Several fixes and a â€œmanifoldâ€ class which enable simulations of particles
+  constrained to a manifold (a 2D surface within the 3D simulation box). This is
+  done by applying the RATTLE constraint algorithm to formulate single-particle
+  constraint functions g(xi,yi,zi) = 0 and their derivative (i.e. the normal of
+  the manifold) n = grad(g).
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MEAMC</code></td>
+  <td>
+  A pair style for the modified embedded atom (MEAM) potential translated from
+  the Fortran version in the MEAM package to plain C++. In contrast to the MEAM
+  package, no library needs to be compiled and the pair style can be
+  instantiated multiple times.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MESO</code></td>
+  <td>
+  Several extensions of the the dissipative particle dynamics (DPD) method.
+  Specifically, energy-conserving DPD (eDPD) that can model non-isothermal
+  processes, many-body DPD (mDPD) for simulating vapor-liquid coexistence, and
+  transport DPD (tDPD) for modeling advection-diffusion-reaction systems. The
+  equations of motion of these DPD extensions are integrated through a modified
+  velocity-Verlet (MVV) algorithm.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MGPT</code></td>
+  <td>
+  A pair style which provides a fast implementation of the quantum-based MGPT
+  multi-ion potentials. The MGPT or model GPT method derives from
+  first-principles DFT-based generalized pseudopotential theory (GPT) through a
+  series of systematic approximations valid for mid-period transition metals
+  with nearly half-filled d bands. The MGPT method was originally developed by
+  John Moriarty at LLNL. The pair style in this package calculates forces and
+  energies using an optimized matrix-MGPT algorithm due to Tomas Oppelstrup at
+  LLNL.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MISC</code></td>
+  <td>
+  A potpourri of (mostly) unrelated features contributed to LAMMPS by users.
+  Each feature is a single fix, compute, pair, bond, angle, dihedral, improper,
+  or command style.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MOFFF</code></td>
+  <td>
+  Pair, angle and improper styles needed to employ the MOF-FF force field by
+  Schmid and coworkers with LAMMPS. MOF-FF is a first principles derived force
+  field with the primary aim to simulate MOFs and related porous framework
+  materials, using spherical Gaussian charges. It is described in S. Bureekaew
+  et al., Phys. Stat. Sol. B 2013, 250, 1128-1141. For the usage of MOF-FF see
+  the example in the example directory as well as the MOF+ website.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-MOLFILE</code></td>
+  <td>
+  A dump molfile command which uses molfile plugins that are bundled with the
+  VMD molecular visualization and analysis program, to enable LAMMPS to dump
+  snapshots in formats compatible with various molecular simulation tools.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-NETCDF</code></td>
+  <td>
+  Dump styles for writing NetCDF formatted dump files. NetCDF is a portable,
+  binary, self-describing file format developed on top of HDF5. The file
+  contents follow the AMBER NetCDF trajectory conventions
+  (http://ambermd.org/netcdf/nctraj.xhtml), but include extensions.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-PHONON</code></td>
+  <td>
+  A fix phonon command that calculates dynamical matrices, which can then be
+  used to compute phonon dispersion relations, directly from molecular dynamics
+  simulations.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-QTB</code></td>
+  <td>
+  Two fixes which provide a self-consistent quantum treatment of vibrational modes in a classical molecular dynamics simulation. By coupling the MD simulation to a colored thermostat, it introduces zero point energy into the system, altering the energy power spectrum and the heat capacity to account for their quantum nature. This is useful when modeling systems at temperatures lower than their classical limits or when temperatures ramp across the classical limits in a simulation.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-QUIP</code></td>
+  <td>
+  A pair_style quip command which wraps the QUIP libAtoms library, which
+  includes a variety of interatomic potentials, including Gaussian Approximation
+  Potential (GAP) models developed by the Cambridge University group.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-QMMM</code></td>
+  <td>
+  A fix qmmm command which allows LAMMPS to be used in a QM/MM simulation,
+  currently only in combination with the Quantum ESPRESSO package.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-REAXC</code></td>
+  <td>
+  A pair style which implements the ReaxFF potential in C/C++ (in contrast to
+  the REAX package and its Fortran library). ReaxFF is universal reactive force
+  field. See the src/USER-REAXC/README file for more info on differences between
+  the two packages. Also two fixes for monitoring molecules as bonds are created
+  and destroyed.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-SMD</code></td>
+  <td>
+  An atom style, fixes, computes, and several pair styles which implements
+  smoothed Mach dynamics (SMD) for solids, which is a model related to smoothed
+  particle hydrodynamics (SPH) for liquids (see the USER-SPH package).
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-SMTBQ</code></td>
+  <td>
+  A pair style which implements a Second Moment Tight Binding model with QEq
+  charge equilibration (SMTBQ) potential for the description of ionocovalent
+  bonds in oxides.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-SPH</code></td>
+  <td>
+  An atom style, fixes, computes, and several pair styles which implements
+  smoothed particle hydrodynamics (SPH) for liquids. See the related USER-SMD
+  package package for smooth Mach dynamics (SMD) for solids.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-TALLY</code></td>
+  <td>
+  Several compute styles that can be called when pairwise interactions are
+  calculated to tally information (forces, heat flux, energy, stress, etc) about
+  individual interactions.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-UEF</code></td>
+  <td>
+  A fix style for the integration of the equations of motion under extensional
+  flow with proper boundary conditions, as well as several supporting compute
+  styles and an output option.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PKG_USER-VTK</code></td>
+  <td>
+  A dump vtk command which outputs snapshot info in the VTK format, enabling
+  visualization by Paraview or other visualization packages.
+  </td>
+  <td>
+  <dl>
+    <dt><code>off</code> (default)</dt>
+    <dt><code>on</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## Package-Specific Configuration Options
+
+### KSPACE Package
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>FFT</code></td>
+  <td>
+    <p>FFT library for KSPACE package</p>
+    <p>If either MKL or FFTW is selected <code>cmake</code> will try to locate these libraries automatically. To control which one should be used please see the options below for each FFT library.</p>
+  </td>
+  <td>
+  <dl>
+    <dt><code>KISSFFT</code></dt>
+    <dt><code>FFTW3</code></dt>
+    <dt><code>FFTW2</code></dt>
+    <dt><code>MKL</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>PACK_ARRAY</code></td>
+  <td>Optimization for FFT</td>
+  <td>
+  <dl>
+    <dt><code>PACK_ARRAY</code></dt>
+    <dt><code>PACK_POINTER</code></dt>
+    <dt><code>PACK_MEMCPY</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### MKL
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>MKL_INCLUDE_DIRS</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>MKL_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+TODO static vs dynamic linking
+
+### FFTW2
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>FFTW2_INCLUDE_DIRS</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>FFTW2_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### FFTW3
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>FFTW3_INCLUDE_DIRS</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>FFTW3_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### LAPACK
+TODO
+
+### PYTHON Package
+
+
+### GPU Package
+The GPU package builds a support library which can either use OpenCL or CUDA as
+target API.
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>GPU_API</code></td>
+  <td>API used by GPU package</td>
+  <td>
+  <dl>
+    <dt><code>OpenCL</code> (default)</dt>
+    <dt><code>CUDA</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>GPU_PREC</code></td>
+  <td>Precision size used by GPU package kernels</td>
+  <td>
+  <dl>
+    <dt><code>SINGLE_DOUBLE</code></dt>
+    <dt><code>SINGLE_SINGLE</code></dt>
+    <dt><code>DOUBLE_DOUBLE</code></dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>OCL_TUNE</code> (OpenCL only)</td>
+  <td>Tuning target for OpenCL driver code</td>
+  <td>
+  <dl>
+    <dt><code>GENERIC</code> (default)</dt>
+    <dt><code>INTEL</code> (Intel CPU)</dt>
+    <dt><code>PHI</code> (Intel Xeon Phi)</dt>
+    <dt><code>FERMI</code> (NVIDIA)</dt>
+    <dt><code>KEPLER</code> (NVIDIA)</dt>
+    <dt><code>CYPRESS</code> (AMD)</dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>GPU_ARCH</code> (CUDA only)</td>
+  <td>CUDA SM architecture targeted by GPU package</td>
+  <td>
+  <dl>
+    <dt><code>sm20</code> (Fermi)</dt>
+    <dt><code>sm30</code> (Kepler)</dt>
+    <dt><code>sm50</code> (Maxwell)</dt>
+    <dt><code>sm60</code> (Pascal)</dt>
+    <dt><code>sm70</code> (Volta)</dt>
+  </dl>
+  </td>
+</tr>
+<tr>
+  <td><code>CUDPP_OPT</code> (CUDA only)</td>
+  <td>Enable CUDA Performance Primitives Optimizations</td>
+  <td>
+  <dl>
+    <dt><code>on</code> (default)</dt>
+    <dt><code>off</code></dt>
+  </dl>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### VORONOI Package
+
+TODO
+
+### USER-SMD Package
+
+Requires a Eigen3 installation
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>EIGEN3_INCLUDE_DIR</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+## Optional Features
+
+### zlib support
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>ZLIB_INCLUDE_DIR</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>ZLIB_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### JPEG support
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>JPEG_INCLUDE_DIR</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>JPEG_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### PNG support
+(requires zlib support)
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>PNG_INCLUDE_DIR</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+<tr>
+  <td><code>PNG_LIBRARIES</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### GZIP support
+
+requires `gzip` to be in your `PATH`
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>GZIP_EXECUTABLE</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+### FFMPEG support
+
+requires `ffmpeg` to be in your `PATH`
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Values</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>FFMPEG_EXECUTABLE</code></td>
+  <td></td>
+  <td>
+  </td>
+</tr>
+</tbody>
+</table>
+
+
+## Compilers
+
+By default, `cmake` will use your environment C/C++/Fortran compilers for a build. It uses the `CC`, `CXX` and `FC` environment variables to detect which compilers should be used. However, these values
+will be cached after the first run of `cmake`. Subsequent runs of `cmake` will ignore changes in these environment variables. To ensure the correct values are used you avoid the cache by setting the `CMAKE_C_COMPILER`, `CMAKE_CXX_COMPILER`, `CMAKE_Fortran_COMPILER` options directly.
+
+<table>
+<thead>
+<tr>
+  <th>Option</th>
+  <th>Description</th>
+  <th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><code>CMAKE_C_COMPILER</code></td>
+  <td>C Compiler which should be used by CMake</td>
+  <td>value of `CC` environment variable at first `cmake` run</td>
+</tr>
+<tr>
+  <td><code>CMAKE_CXX_COMPILER</code></td>
+  <td>C++ compiler which should be used by CMake</td>
+  <td>
+  value of `CXX` environment variable at first `cmake` run
+  </td>
+</tr>
+<tr>
+  <td><code>CMAKE_Fortran_COMPILER</code></td>
+  <td>C++ compiler which should be used by CMake</td>
+  <td>
+  value of `FC` environment variable at first `cmake` run
+  </td>
+</tr>
+</tbody>
+</table>
+
+### Building with GNU Compilers
+
+```bash
+cmake ../cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_Fortran_COMPILER=gfortran
 ```
 
-cmake has many many options, do get an overview use the curses-based cmake interface, ccmake:
+### Building with Intel Compilers
+
+```bash
+cmake ../cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort
 ```
-ccmake /path/to/lammps/source/cmake
+
+
+### Building with LLVM/Clang Compilers
+
+```bash
+cmake ../cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_Fortran_COMPILER=flang
 ```
-(Don't forget to press "g" for generate once you are done with configuring)
+
+
+## Examples
diff --git a/doc/Makefile b/doc/Makefile
index 0a5dbe1e7d64be21628f6e85e0e72a7468752d57..c8cc8bc1bd027947165d30d79df3a0a8f3c0d73b 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -9,6 +9,7 @@ TXT2RST       = $(VENV)/bin/txt2rst
 ANCHORCHECK   = $(VENV)/bin/doc_anchor_check
 
 PYTHON        = $(shell which python3)
+VIRTUALENV     = virtualenv
 HAS_PYTHON3    = NO
 HAS_VIRTUALENV = NO
 
@@ -16,7 +17,13 @@ ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
 HAS_PYTHON3 = YES
 endif
 
+ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
+VIRTUALENV     = virtualenv-3
+HAS_VIRTUALENV = YES
+endif
+
 ifeq ($(shell which virtualenv >/dev/null 2>&1; echo $$?), 0)
+VIRTUALENV     = virtualenv
 HAS_VIRTUALENV = YES
 endif
 
@@ -158,7 +165,7 @@ $(VENV):
 	@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
 	@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
 	@( \
-		virtualenv -p $(PYTHON) $(VENV); \
+		$(VIRTUALENV) -p $(PYTHON) $(VENV); \
 		. $(VENV)/bin/activate; \
 		pip install Sphinx; \
 		pip install sphinxcontrib-images; \
diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index 6b2c7daac7d682c05469e44db60b9e0aad02f5c5..e302b061e3c2ad6d67661e1e9fe6feec712520f1 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="11 May 2018 version">
+<META NAME="docnumber" CONTENT="22 Jun 2018 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@@ -19,7 +19,7 @@
 :line
 
 LAMMPS Documentation :c,h1
-11 May 2018 version :c,h2
+22 Jun 2018 version :c,h2
 
 Version info: :h3
 
diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt
index 3dabdbeaa1e1091db35b798f19cf9f3b2b74848f..891abc24280b7ee5d46b74be0a2cda5a48189976 100644
--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@@ -129,6 +129,17 @@ region 1 block $((xlo+xhi)/2+sqrt(v_area)) 2 INF INF EDGE EDGE :pre
 
 so that you do not have to define (or discard) a temporary variable X.
 
+Additionally, the "immediate" variable expression may be followed by a
+colon, followed by a C-style format string, e.g. ":%f" or ":%.10g".
+The format string must be appropriate for a double-precision
+floating-point value.  The format string is used to output the result
+of the variable expression evaluation.  If a format string is not
+specified a high-precision "%.20g" is used as the default.
+
+This can be useful for formatting print output to a desired precion:
+
+print "Final energy per atom: $(pe/atoms:%10.3f) eV/atom" :pre
+
 Note that neither the curly-bracket or immediate form of variables can
 contain nested $ characters for other variables to substitute for.
 Thus you cannot do this:
diff --git a/doc/src/Section_errors.txt b/doc/src/Section_errors.txt
index 2821f1ff5d046d9b6ef94d8dc4a1e3eb8cb70b58..95482b06dc5bc1d768a0e4c355d8aad5e5e08821 100644
--- a/doc/src/Section_errors.txt
+++ b/doc/src/Section_errors.txt
@@ -803,6 +803,13 @@ lo value must be less than the hi value for all 3 dimensions. :dd
 The box command cannot be used after a read_data, read_restart, or
 create_box command. :dd
 
+{BUG: restartinfo=1 but no restart support in pair style} :dt
+
+The pair style has a bug, where it does not support reading
+and writing information to a restart file, but does not set
+the member variable restartinfo to 0 as required in that case. :dd
+
+
 {CPU neighbor lists must be used for ellipsoid/sphere mix.} :dt
 
 When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
diff --git a/doc/src/create_bonds.txt b/doc/src/create_bonds.txt
index 6af69214d3c4523318945c3bf70ac0547621dd64..6700ed29d3f3f6d9b3c7d5094b182a1bf6235a82 100644
--- a/doc/src/create_bonds.txt
+++ b/doc/src/create_bonds.txt
@@ -37,8 +37,8 @@ keyword = {special} :l
 
 create_bonds many all all 1 1.0 1.2
 create_bonds many surf solvent 3 2.0 2.4
-create_bond single/bond 1 1 2
-create_bond single/angle 5 52 98 107 special no :pre
+create_bonds single/bond 1 1 2
+create_bonds single/angle 5 52 98 107 special no :pre
 
 [Description:]
 
diff --git a/doc/src/fix_adapt.txt b/doc/src/fix_adapt.txt
index 19d1009b8a838829c0dfee1adde7526134d29053..7a34f2ff4480004c3ff249957b2b36204a087035 100644
--- a/doc/src/fix_adapt.txt
+++ b/doc/src/fix_adapt.txt
@@ -205,6 +205,14 @@ a bond coefficient over time, very similar to how the {pair} keyword
 operates. The only difference is that now a bond coefficient for a
 given bond type is adapted.
 
+A wild-card asterisk can be used in place of or in conjunction with
+the bond type argument to set the coefficients for multiple bond types.
+This takes the form "*" or "*n" or "n*" or "m*n".  If N = the number of
+atom types, then an asterisk with no numeric values means all types
+from 1 to N.  A leading asterisk means all types from 1 to n (inclusive).
+A trailing asterisk means all types from n to N (inclusive).  A middle
+asterisk means all types from m to n (inclusive).
+
 Currently {bond} does not support bond_style hybrid nor bond_style
 hybrid/overlay as bond styles. The only bonds that currently are
 working with fix_adapt are
diff --git a/doc/src/fix_dt_reset.txt b/doc/src/fix_dt_reset.txt
index 1bed4f9e89271f367dd68257f78f33b76c42f11f..7605395ca05122bbde2a2600e2349b9026be4b56 100644
--- a/doc/src/fix_dt_reset.txt
+++ b/doc/src/fix_dt_reset.txt
@@ -19,7 +19,9 @@ Tmin = minimum dt allowed which can be NULL (time units)
 Tmax = maximum dt allowed which can be NULL (time units)
 Xmax = maximum distance for an atom to move in one timestep (distance units)
 zero or more keyword/value pairs may be appended
-keyword = {units} :ul
+keyword = {emax} or {units} :ul
+  {emax} value = Emax
+    Emax = maximum kinetic energy change for an atom in one timestep (energy units)
   {units} value = {lattice} or {box}
     lattice = Xmax is defined in lattice units
     box = Xmax is defined in simulation box units :pre
@@ -27,12 +29,17 @@ keyword = {units} :ul
 [Examples:]
 
 fix 5 all dt/reset 10 1.0e-5 0.01 0.1
-fix 5 all dt/reset 10 0.01 2.0 0.2 units box :pre
+fix 5 all dt/reset 10 0.01 2.0 0.2 units box
+fix 5 all dt/reset 5 NULL 0.001 0.5 emax 30 units box :pre
 
 [Description:]
 
 Reset the timestep size every N steps during a run, so that no atom
-moves further than Xmax, based on current atom velocities and forces.
+moves further than the specified {Xmax} distance, based on current
+atom velocities and forces.  Optionally an additional criterion is
+imposed by the {emax} keyword, so that no atom's kinetic energy
+changes by more than the specified {Emax}.
+
 This can be useful when starting from a configuration with overlapping
 atoms, where forces will be large.  Or it can be useful when running
 an impact simulation where one or more high-energy atoms collide with
@@ -48,7 +55,12 @@ current velocity and force.  Since performing this calculation exactly
 would require the solution to a quartic equation, a cheaper estimate
 is generated.  The estimate is conservative in that the atom's
 displacement is guaranteed not to exceed {Xmax}, though it may be
-smaller.
+smaller. 
+
+In addition if the {emax} keyword is used, the specified {Emax} value
+is enforced as a limit on how much an atom's kinetic energy can
+change.  If the timestep required is even smaller than for the {Xmax}
+displacement, then the smaller timestep is used.
 
 Given this putative timestep for each atom, the minimum timestep value
 across all atoms is computed.  Then the {Tmin} and {Tmax} bounds are
@@ -87,4 +99,5 @@ minimization"_minimize.html.
 
 [Default:]
 
-The option defaults is units = lattice.
+The option defaults are units = lattice, and no emax kinetic energy
+limit.
diff --git a/doc/src/molecule.txt b/doc/src/molecule.txt
index ecec62256a82a3b2e67f9fe64a2af6f796f0c66e..cd9ecce42c8b8750e52389e6b48c08885a7cae3f 100644
--- a/doc/src/molecule.txt
+++ b/doc/src/molecule.txt
@@ -98,19 +98,20 @@ molecule (header keyword = inertia).
 NOTE: The molecule command can be used to define molecules with bonds,
 angles, dihedrals, imporopers, or special bond lists of neighbors
 within a molecular topology, so that you can later add the molecules
-to your simulation, via one or more of the commands listed above.  If
-such molecules do not already exist when LAMMPS creates the simulation
-box, via the "create_box"_create_box.html or
-"read_data"_read_data.html command, when you later add them you may
-overflow the pre-allocated data structures which store molecular
-topology information with each atom, and an error will be generated.
-Both the "create_box"_create_box.html command and the data files read
-by the "read_data"_read_data.html command have "extra" options which
+to your simulation, via one or more of the commands listed above.
+Since this topology-related information requires that suitable storage
+is reserved when LAMMPS creates the simulation box (e.g. when using
+the "create_box"_create_box.html command or the
+"read_data"_read_data.html command) suitable space has to be reserved
+so you do not overflow those pre-allocated data structures when adding
+molecules later.  Both the "create_box"_create_box.html command and
+the "read_data"_read_data.html command have "extra" options which
 insure space is allocated for storing topology info for molecules that
 are added later.
 
-The format of an individual molecule file is similar to the data file
-read by the "read_data"_read_data.html commands, and is as follows.
+The format of an individual molecule file is similar but
+(not identical) to the data file read by the "read_data"_read_data.html
+commands, and is as follows.
 
 A molecule file has a header and a body.  The header appears first.
 The first line of the header is always skipped; it typically contains
@@ -455,7 +456,11 @@ of SHAKE clusters.
 
 :line
 
-[Restrictions:] none
+[Restrictions:]
+
+This command must come after the simulation box is define by a
+"read_data"_read_data.html, "read_restart"_read_restart.html, or
+"create_box"_create_box.html command.
 
 [Related commands:]
 
diff --git a/doc/src/pair_born.txt b/doc/src/pair_born.txt
index a016f77fa3ba6c96ef18fbb7d1094e79a0475aab..f867107426b04b7c4f92ddafafe75ed3687b2c16 100644
--- a/doc/src/pair_born.txt
+++ b/doc/src/pair_born.txt
@@ -12,12 +12,14 @@ pair_style born/omp command :h3
 pair_style born/gpu command :h3
 pair_style born/coul/long command :h3
 pair_style born/coul/long/cs command :h3
+pair_style born/coul/long/cs/gpu command :h3
 pair_style born/coul/long/gpu command :h3
 pair_style born/coul/long/omp command :h3
 pair_style born/coul/msm command :h3
 pair_style born/coul/msm/omp command :h3
 pair_style born/coul/wolf command :h3
 pair_style born/coul/wolf/cs command :h3
+pair_style born/coul/wolf/cs/gpu command :h3
 pair_style born/coul/wolf/gpu command :h3
 pair_style born/coul/wolf/omp command :h3
 pair_style born/coul/dsf command :h3
diff --git a/doc/src/pair_coul.txt b/doc/src/pair_coul.txt
index 4cca5ee0d78a529162e782121855150daa162e06..aa3a008bd36f2e4b4c0f51da19390d1c8a5bb80a 100644
--- a/doc/src/pair_coul.txt
+++ b/doc/src/pair_coul.txt
@@ -20,6 +20,7 @@ pair_style coul/dsf/kk command :h3
 pair_style coul/dsf/omp command :h3
 pair_style coul/long command :h3
 pair_style coul/long/cs command :h3
+pair_style coul/long/cs/gpu command :h3
 pair_style coul/long/omp command :h3
 pair_style coul/long/gpu command :h3
 pair_style coul/long/kk command :h3
diff --git a/doc/src/variable.txt b/doc/src/variable.txt
index c0851464c3f14e11629b8e65ee58a171499a4e45..717c77c07908099a76f4f32991697bfcb8294bc6 100644
--- a/doc/src/variable.txt
+++ b/doc/src/variable.txt
@@ -296,17 +296,24 @@ list of runs (e.g. 1000) without having to list N strings in the input
 script.
 
 For the {string} style, a single string is assigned to the variable.
-The only difference between this and using the {index} style with a
-single string is that a variable with {string} style can be redefined.
-E.g. by another command later in the input script, or if the script is
-read again in a loop.
+Two differences between this this and using the {index} style exist:
+a variable with {string} style can be redefined, e.g. by another command later
+in the input script, or if the script is read again in a loop. The other
+difference is that {string} performs variable substitution even if the
+string parameter is quoted.
 
 For the {format} style, an equal-style variable is specified along
 with a C-style format string, e.g. "%f" or "%.10g", which must be
 appropriate for formatting a double-precision floating-point value.
-This allows an equal-style variable to be formatted specifically for
-output as a string, e.g. by the "print"_print.html command, if the
-default format "%.15g" has too much precision.
+The default format is "%.15g".  This variable style allows an
+equal-style variable to be formatted precisely when it is evaluated.
+
+If you simply wish to print a variable value with desired precision to
+the screen or logfile via the "print"_print.html or "fix
+print"_fix_print.html commands, you can also do this by specifying an
+"immediate" variable with a trailing colon and format string, as part
+of the string argument of those commands.  This is explained in
+"Section 3.2"_Section_commands.html#cmd_2.
 
 For the {getenv} style, a single string is assigned to the variable
 which should be the name of an environment variable.  When the
diff --git a/examples/latte/data.ch4 b/examples/latte/data.ch4
new file mode 100644
index 0000000000000000000000000000000000000000..fc8d48d244305c0b4f64c523bb7021418ea4c439
--- /dev/null
+++ b/examples/latte/data.ch4
@@ -0,0 +1,23 @@
+ LAMMPS Description
+ 
+           5 atoms
+ 
+           2 atom types
+ 
+   0.0000000000000000        19.523000000000000      xlo xhi
+   0.0000000000000000        12.757999999999999      ylo yhi
+   0.0000000000000000        11.692000000000000      zlo zhi
+   0.0000000000000000        0.0000000000000000        0.0000000000000000      xy xz yz
+ 
+ Masses
+ 
+              1   12.010000000000000     
+              2   1.0078250169754028     
+ 
+ Atoms
+ 
+    1    1    1   0.0  -9.16600   2.05200   0.00000
+    2    1    2   0.0  -8.09600   2.05200   0.00000
+    3    1    2   0.0  -9.52300   2.75800  -0.72000
+    4    1    2   0.0  -9.52300   2.32200   0.97200
+    5    1    2   0.0  -9.52300   1.07500  -0.25200
diff --git a/examples/latte/data.graphene b/examples/latte/data.graphene
new file mode 100644
index 0000000000000000000000000000000000000000..5455a2eeb6cc1756e6c5ad5af01069aa5f01d866
--- /dev/null
+++ b/examples/latte/data.graphene
@@ -0,0 +1,49 @@
+ LAMMPS Description
+ 
+          32 atoms
+ 
+           1 atom types
+ 
+   0.0000000000000000        10.000000000000000      xlo xhi
+   0.0000000000000000        8.0000000000000000      ylo yhi
+   0.0000000000000000        20.000000000000000      zlo zhi
+   4.8985871965894128E-016   1.2246467991473533E-015   1.2246467991473533E-015 xy xz yz
+ 
+ Masses
+ 
+              1   12.010000000000000     
+ 
+ Atoms
+ 
+    1    1    1   0.0   4.93100   4.25000   0.00500
+    2    1    1   0.0   8.62100   2.12100   0.14000
+    3    1    1   0.0   3.70700   2.12600   0.14700
+    4    1    1   0.0   7.38200   4.25400   0.07800
+    5    1    1   0.0   2.47900   4.25400   0.08000
+    6    1    1   0.0   6.15800   6.37400  -0.01000
+    7    1    1   0.0   1.23700   6.38300   0.06600
+    8    1    1   0.0   1.24000   2.12100   0.14600
+    9    1    1   0.0   6.15500   2.12600   0.12900
+   10    1    1   0.0   0.00700   4.25200   0.12200
+   11    1    1   0.0   8.62100   6.38500   0.04100
+   12    1    1   0.0   3.70000   6.37400  -0.01000
+   13    1    1   0.0   0.00600   1.41600   0.13000
+   14    1    1   0.0   4.93000   1.40800   0.14700
+   15    1    1   0.0   8.61800   3.54600   0.11500
+   16    1    1   0.0   3.70800   3.55300   0.08400
+   17    1    1   0.0   7.39400   5.68000   0.03500
+   18    1    1   0.0   2.46500   5.68000   0.03500
+   19    1    1   0.0   6.16000   7.80500   0.02700
+   20    1    1   0.0   1.23800   7.81100   0.06000
+   21    1    1   0.0   2.47300   1.41800   0.16100
+   22    1    1   0.0   7.38900   1.41700   0.14800
+   23    1    1   0.0   1.24200   3.54700   0.12600
+   24    1    1   0.0   6.15300   3.55300   0.07400
+   25    1    1   0.0   0.00700   5.67800   0.09700
+   26    1    1   0.0   4.93100   5.66800  -0.03100
+   27    1    1   0.0   8.62000   7.81300   0.03900
+   28    1    1   0.0   3.70100   7.80200   0.03700
+   29    1    1   0.0   0.00700  -0.01000   0.08900
+   30    1    1   0.0   4.93100  -0.01500   0.16100
+   31    1    1   0.0   2.47300  -0.01200   0.14400
+   32    1    1   0.0   7.38900  -0.01300   0.14800
diff --git a/examples/latte/in.graphene.boxrel b/examples/latte/in.graphene.boxrel
new file mode 100644
index 0000000000000000000000000000000000000000..721b7f014f0981bdc8672e2d1bdc2f7bb9f6932d
--- /dev/null
+++ b/examples/latte/in.graphene.boxrel
@@ -0,0 +1,44 @@
+# Simple water model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.graphene
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *  
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes 
+
+timestep        0.00025
+
+fix 1 all box/relax iso 0.0 vmax 0.001
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom   etotal
+
+# minimization
+
+thermo          1
+fix 3 all print 1 "Total Energy ="
+min_style cg
+min_modify dmax 0.1
+min_modify line quadratic
+minimize        1.0e-4 1.0e-4 10000 10000
diff --git a/examples/latte/in.latte.sucrose.md b/examples/latte/in.latte.sucrose.md
new file mode 100644
index 0000000000000000000000000000000000000000..a10dae5c5ea9451d7733de60d695d559cd340b78
--- /dev/null
+++ b/examples/latte/in.latte.sucrose.md
@@ -0,0 +1,40 @@
+# simple sucrose model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.sucrose
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *  
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes 
+
+timestep        0.00025
+
+fix		1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run		100
diff --git a/examples/latte/in.latte.water.ch4.consecutive.md b/examples/latte/in.latte.water.ch4.consecutive.md
new file mode 100644
index 0000000000000000000000000000000000000000..4888d14ebc6fe862b8d98e2520879cdd11488f01
--- /dev/null
+++ b/examples/latte/in.latte.water.ch4.consecutive.md
@@ -0,0 +1,65 @@
+units           metal
+atom_style      full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.water
+
+# initialize system
+
+velocity        all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff      * * 
+
+neighbor        1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix             1 all nve
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run             10
+
+# Clear up previus calculation
+
+clear
+
+# simple CH4 molecule with LATTE
+
+units           metal
+atom_style      full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.ch4
+
+# initialize system
+
+velocity        all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff      * * 
+
+neighbor        1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix             1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run             10
+
diff --git a/examples/latte/in.latte.water.md b/examples/latte/in.latte.water.md
new file mode 100644
index 0000000000000000000000000000000000000000..e1185602b43ec5990e27f904cb97cfc2f725a3df
--- /dev/null
+++ b/examples/latte/in.latte.water.md
@@ -0,0 +1,40 @@
+# simple water model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.water
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *  
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes 
+
+timestep        0.00025
+
+fix		1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run		100
diff --git a/examples/latte/in.latte.water.min b/examples/latte/in.latte.water.min
index 503e45a300b25802bde6ae646d317c15774dd355..173afee96a8b8d5987a7bd92ebda8ddb24996408 100644
--- a/examples/latte/in.latte.water.min
+++ b/examples/latte/in.latte.water.min
@@ -37,5 +37,6 @@ thermo_style    custom step temp pe etotal press
 # minimization
 
 thermo          10
-min_style fire
-minimize        1.0e-9 1.0e-9 500 500
+
+min_style       fire
+minimize        1.0e-4 1.0e-4 500 500
diff --git a/examples/latte/latte.in b/examples/latte/latte.in
index b8b214b78b4847ee4a58c0a03e6f8a94adc78e4b..de905f411e552a9639c7fb774e6b254a5baa7a19 100644
--- a/examples/latte/latte.in
+++ b/examples/latte/latte.in
@@ -11,7 +11,6 @@ LATTE INPUT FILE
 CONTROL{
   xControl= 1
   BASISTYPE= NONORTHO
-  COORDSFILE= "./coords.dat"
   PARAMPATH= "./TBparam"
   KBT= 0.0
   ENTROPYKIND= 1
@@ -22,9 +21,8 @@ CONTROL{
   BREAKTOL= 1.0E-6 MINSP2ITER= 22 SP2CONV= REL
   FULLQCONV= 1 QITER= 3
   QMIX= 0.25 SPINMIX= 0.25 MDMIX= 0.25
-  SPARSEON= 1 THRESHOLDON= 1 NUMTHRESH= 1.0e-6 FILLINSTOP= 100 BLKSZ= 4
+  SPARSEON= 0 THRESHOLDON= 1 NUMTHRESH= 1.0e-6 FILLINSTOP= 100 BLKSZ= 4
   MSPARSE= 1500
-  RELAX= 0 RELAXTYPE= SD MAXITER= 100000 RLXFTOL= 0.0000001
   SKIN= 1.0
   CHARGE= 0
   XBO= 1
@@ -32,9 +30,3 @@ CONTROL{
   XBODISORDER= 5
   KON= 0
 }
-
-#Controls for QMD (if using lammps MAXITER must be set to -1)
-MDCONTROL{
-  MAXITER= -1
-}
-
diff --git a/examples/latte/log.19Sep17.latte.sucrose.g++.1 b/examples/latte/log.19Sep17.latte.sucrose.md.g++.1
similarity index 100%
rename from examples/latte/log.19Sep17.latte.sucrose.g++.1
rename to examples/latte/log.19Sep17.latte.sucrose.md.g++.1
diff --git a/examples/latte/log.19Sep17.latte.water.g++.1 b/examples/latte/log.19Sep17.latte.water.md.g++.1
similarity index 100%
rename from examples/latte/log.19Sep17.latte.water.g++.1
rename to examples/latte/log.19Sep17.latte.water.md.g++.1
diff --git a/examples/latte/log.19Sep17.latte.water.min.g++.1 b/examples/latte/log.19Sep17.latte.water.min.g++.1
deleted file mode 100644
index 4b96bd26682bcb3162250bc48ecd8cd3eb60387d..0000000000000000000000000000000000000000
--- a/examples/latte/log.19Sep17.latte.water.min.g++.1
+++ /dev/null
@@ -1,152 +0,0 @@
-LAMMPS (1 Sep 2017)
-# simple water model with LATTE
-
-units		metal
-atom_style	full
-atom_modify     sort 0 0.0    # turn off sorting of the coordinates
-
-read_data       data.water
-  orthogonal box = (0 0 0) to (6.267 6.267 6.267)
-  1 by 1 by 1 MPI processor grid
-  reading atoms ...
-  24 atoms
-  0 = max # of 1-2 neighbors
-  0 = max # of 1-3 neighbors
-  0 = max # of 1-4 neighbors
-  1 = max # of special neighbors
-
-# replicate system if requested
-
-variable	x index 1
-variable	y index 1
-variable	z index 1
-
-variable        nrep equal v_x*v_y*v_z
-if              "${nrep} > 1" then "replicate $x $y $z"
-
-# initialize system
-
-velocity	all create 0.0 87287 loop geom
-
-pair_style      zero 1.0
-pair_coeff	* *
-
-neighbor	1.0 bin
-neigh_modify    every 1 delay 0 check yes
-
-timestep        0.00025
-
-fix		1 all nve
-
-fix             2 all latte NULL
-fix_modify      2 energy yes
-
-thermo_style    custom step temp pe etotal press
-
-# minimization
-
-thermo          10
-min_style fire
-minimize        1.0e-9 1.0e-9 500 500
-Neighbor list info ...
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 2
-  ghost atom cutoff = 2
-  binsize = 1, bins = 7 7 7
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair zero, perpetual
-      attributes: half, newton on
-      pair build: half/bin/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
-Step Temp PotEng TotEng Press 
-       0            0   -104.95614   -104.95614    48229.712 
-      10    349.44219   -105.50971   -104.47083    62149.591 
-      20    1253.6752   -107.00898   -103.28182    116444.44 
-      30    134.63588   -107.56184   -107.16157    59854.143 
-      40    2.4043703   -108.15301   -108.14586     32685.77 
-      50    162.13426   -108.40551   -107.92349    62104.273 
-      60    134.03149   -108.70118   -108.30271    49400.525 
-      70    64.159014   -108.78034    -108.5896    37243.303 
-      80    240.49926   -109.10766   -108.39266    42158.884 
-      90   0.60467192   -109.61818   -109.61639    14107.515 
-     100    1.4691163   -109.65556   -109.65119    21596.775 
-     110    30.500628   -109.69267     -109.602    16104.639 
-     120    120.62379   -109.83749   -109.47888     9474.971 
-     130    8.4742975   -109.99986   -109.97467    10104.102 
-     140    3.4732679   -110.01209   -110.00176    11990.442 
-     150    24.749482   -110.04313   -109.96955    10851.569 
-     160    4.1106505   -110.13288   -110.12066    8257.3969 
-     170 0.0065628716   -110.18061   -110.18059    7876.8748 
-     180    2.0542078    -110.1837   -110.17759    7996.0533 
-     190    20.134782   -110.21071   -110.15085    7556.1811 
-     200    2.3397267    -110.3244   -110.31745     3767.062 
-     210    4.3544709   -110.34438   -110.33143     4889.145 
-     220    1.1872367   -110.37457   -110.37104    4162.6543 
-     230    2.2798399   -110.38081   -110.37403    4321.0943 
-     240    11.835907   -110.39611   -110.36092    4187.5757 
-     250   0.13741849   -110.41453   -110.41412    3720.7527 
-     260    4.2283185   -110.42036   -110.40779    3743.3494 
-     270   0.47243724   -110.44349   -110.44208    3172.1866 
-     280   0.06090137   -110.45428    -110.4541    3065.9348 
-     290    5.3413962   -110.46285   -110.44697    3121.2924 
-     300    8.2032986   -110.48519    -110.4608    2705.5001 
-     310    2.0783529   -110.48807   -110.48189    2740.7989 
-     320    16.629185   -110.51002   -110.46058    2581.7434 
-     330   0.19723065   -110.53444   -110.53385    1942.0228 
-     340    6.2758334   -110.54361   -110.52495    1924.0965 
-     350    1.4539052   -110.59108   -110.58676   -449.41056 
-     360    0.0514233   -110.60143   -110.60128    1284.8259 
-     370    1.7240145   -110.60394   -110.59881    1468.0004 
-     380     13.28516   -110.62337   -110.58387    1573.4714 
-     390    1.2247432   -110.63525   -110.63161    1113.4557 
-     400    0.3946985   -110.63694   -110.63576    1083.0801 
-     410    2.9831433     -110.641   -110.63213     1112.419 
-     420  0.068550589   -110.66029   -110.66009    897.09211 
-     430   0.83976182   -110.66259   -110.66009    918.69832 
-     440    4.4760907   -110.66844   -110.65513    915.24435 
-     450    1.2841241   -110.67482     -110.671    953.30422 
-     460    2.5707455   -110.68509   -110.67745    775.21273 
-     470   0.99721544   -110.68646    -110.6835    812.74984 
-     480    6.8379261   -110.69468   -110.67435     787.9705 
-     490   0.18134438   -110.69628   -110.69574    675.52792 
-     500    2.0946523   -110.69918   -110.69295    696.82065 
-Loop time of 31.775 on 1 procs for 500 steps with 24 atoms
-
-884.8% CPU use with 1 MPI tasks x no OpenMP threads
-
-Minimization stats:
-  Stopping criterion = max iterations
-  Energy initial, next-to-last, final = 
-         -104.95614332     -110.698546127     -110.699182193
-  Force two-norm initial, final = 19.119 0.234621
-  Force max component initial, final = 11.7759 0.0903198
-  Final line search alpha, max atom move = 0 0
-  Iterations, force evaluations = 500 500
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.00016952 | 0.00016952 | 0.00016952 |   0.0 |  0.00
-Bond    | 2.8372e-05 | 2.8372e-05 | 2.8372e-05 |   0.0 |  0.00
-Neigh   | 3.0994e-05 | 3.0994e-05 | 3.0994e-05 |   0.0 |  0.00
-Comm    | 0.00060034 | 0.00060034 | 0.00060034 |   0.0 |  0.00
-Output  | 0.00057817 | 0.00057817 | 0.00057817 |   0.0 |  0.00
-Modify  | 31.771     | 31.771     | 31.771     |   0.0 | 99.99
-Other   |            | 0.002469   |            |       |  0.01
-
-Nlocal:    24 ave 24 max 24 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    71 ave 71 max 71 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    27 ave 27 max 27 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 27
-Ave neighs/atom = 1.125
-Ave special neighs/atom = 0
-Neighbor list builds = 2
-Dangerous builds = 0
-Total wall time: 0:00:31
diff --git a/examples/latte/log.21Jun18.latte.graphene.boxrelax.g++.1 b/examples/latte/log.21Jun18.latte.graphene.boxrelax.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..3a37136fd36595826ab3e5d51b5ef31bca1f3b96
--- /dev/null
+++ b/examples/latte/log.21Jun18.latte.graphene.boxrelax.g++.1
@@ -0,0 +1,170 @@
+LAMMPS (11 May 2018)
+# Simple water model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.graphene.boxrel
+  triclinic box = (0 0 0) to (10 8 20) with tilt (4.89859e-16 1.22465e-15 1.22465e-15)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  32 atoms
+  0 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix 1 all box/relax iso 0.0 vmax 0.001
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom   etotal
+
+# minimization
+
+thermo          1
+fix 3 all print 1 "Total Energy ="
+min_style cg
+min_modify dmax 0.1
+min_modify line quadratic
+minimize        1.0e-4 1.0e-4 10000 10000
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 11 9 20
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/newton/tri
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.779 | 6.779 | 6.779 Mbytes
+TotEng 
+  -247.46002 
+  -247.67224 
+  -247.87937 
+  -248.08148 
+  -248.27865 
+  -248.47096 
+  -248.65851 
+  -248.84137 
+  -249.01964 
+  -249.19342 
+  -249.36281 
+  -249.52791 
+  -249.68883 
+   -249.8457 
+  -249.99865 
+   -250.1478 
+  -250.29332 
+  -250.43535 
+  -250.57409 
+  -250.70972 
+  -250.84247 
+  -250.97258 
+  -251.10035 
+   -251.2261 
+  -251.35021 
+  -251.47314 
+  -251.59543 
+  -251.71776 
+  -251.84096 
+   -251.9661 
+  -252.09459 
+  -252.22833 
+  -252.37003 
+  -252.52371 
+  -252.69578 
+  -252.89752 
+  -253.15197 
+  -253.52044 
+  -254.31418 
+   -255.6175 
+   -256.8162 
+   -258.1227 
+  -259.38401 
+  -260.74831 
+  -262.03991 
+   -263.5463 
+  -264.70486 
+  -267.69144 
+  -267.88682 
+  -269.03519 
+  -270.60187 
+  -270.65382 
+  -270.74279 
+  -271.55883 
+  -271.81248 
+  -271.87529 
+  -273.01494 
+  -273.23948 
+  -273.28719 
+  -273.35272 
+  -273.41591 
+  -273.46274 
+  -273.54755 
+  -273.58318 
+  -273.73111 
+  -273.75754 
+Loop time of 39.4155 on 1 procs for 65 steps with 32 atoms
+
+1582.4% CPU use with 1 MPI tasks x no OpenMP threads
+
+Minimization stats:
+  Stopping criterion = energy tolerance
+  Energy initial, next-to-last, final = 
+        -247.460020579     -273.731112592     -273.757543461
+  Force two-norm initial, final = 201.608 9.43485
+  Force max component initial, final = 188.924 2.41297
+  Final line search alpha, max atom move = 0.000223273 0.00053875
+  Iterations, force evaluations = 65 65
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00012159 | 0.00012159 | 0.00012159 |   0.0 |  0.00
+Bond    | 5.1975e-05 | 5.1975e-05 | 5.1975e-05 |   0.0 |  0.00
+Neigh   | 4.1962e-05 | 4.1962e-05 | 4.1962e-05 |   0.0 |  0.00
+Comm    | 0.00026107 | 0.00026107 | 0.00026107 |   0.0 |  0.00
+Output  | 0.0013342  | 0.0013342  | 0.0013342  |   0.0 |  0.00
+Modify  | 39.412     | 39.412     | 39.412     |   0.0 | 99.99
+Other   |            | 0.00127    |            |       |  0.00
+
+Nlocal:    32 ave 32 max 32 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    100 ave 100 max 100 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    48 ave 48 max 48 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 48
+Ave neighs/atom = 1.5
+Ave special neighs/atom = 0
+Neighbor list builds = 1
+Dangerous builds = 0
+Total wall time: 0:00:40
diff --git a/examples/latte/log.21Jun18.latte.sucrose.g++.1 b/examples/latte/log.21Jun18.latte.sucrose.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..cb4526587cccc40da331b1dceed633550e87158f
--- /dev/null
+++ b/examples/latte/log.21Jun18.latte.sucrose.g++.1
@@ -0,0 +1,103 @@
+LAMMPS (11 May 2018)
+# simple sucrose model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.sucrose
+  orthogonal box = (0 0 0) to (17.203 18.009 21.643)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  45 atoms
+  0 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix		1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run		100
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 18 19 22
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 0.5064 | 0.5064 | 0.5064 Mbytes
+Step Temp PotEng TotEng Press 
+       0            0   -251.26617   -251.26617    16.617234 
+      10  0.025263709   -251.26631   -251.26617    8.0576708 
+      20  0.034232467   -251.26636   -251.26617    1.6673442 
+      30  0.059079556    -251.2665   -251.26617    11.058458 
+      40  0.055499766   -251.26648   -251.26617    14.837775 
+      50  0.058499509    -251.2665   -251.26617    6.7183113 
+      60  0.071094535   -251.26657   -251.26617    6.6133687 
+      70  0.084309439   -251.26665   -251.26617    12.372721 
+      80    0.1089929   -251.26679   -251.26617    8.8355516 
+      90   0.11378257   -251.26681   -251.26617    5.1177922 
+     100   0.13003966   -251.26691   -251.26617    8.2431185 
+Loop time of 27.8386 on 1 procs for 100 steps with 45 atoms
+
+Performance: 0.078 ns/day, 309.318 hours/ns, 3.592 timesteps/s
+1799.6% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 8.3685e-05 | 8.3685e-05 | 8.3685e-05 |   0.0 |  0.00
+Bond    | 7.4148e-05 | 7.4148e-05 | 7.4148e-05 |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00016689 | 0.00016689 | 0.00016689 |   0.0 |  0.00
+Output  | 0.00032401 | 0.00032401 | 0.00032401 |   0.0 |  0.00
+Modify  | 27.837     | 27.837     | 27.837     |   0.0 |100.00
+Other   |            | 0.0005403  |            |       |  0.00
+
+Nlocal:    45 ave 45 max 45 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    59 ave 59 max 59 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 59
+Ave neighs/atom = 1.31111
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:28
diff --git a/examples/latte/log.21Jun18.latte.water.g++.1 b/examples/latte/log.21Jun18.latte.water.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..0decce1f985aa32cd18220ae15f9ca07373748ef
--- /dev/null
+++ b/examples/latte/log.21Jun18.latte.water.g++.1
@@ -0,0 +1,103 @@
+LAMMPS (11 May 2018)
+# simple water model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.water
+  orthogonal box = (0 0 0) to (6.267 6.267 6.267)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  24 atoms
+  0 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix		1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# dynamics
+
+thermo          10
+run		100
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 7 7 7
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
+Step Temp PotEng TotEng Press 
+       0            0   -104.95594   -104.95594    48236.006 
+      10     336.5303   -105.96026   -104.95976    97997.303 
+      20    529.06385   -106.53021   -104.95731    131520.49 
+      30    753.62616    -107.1995   -104.95898    49297.371 
+      40     716.6565   -107.08802   -104.95741    28307.272 
+      50    824.04417   -107.40822   -104.95835    102167.48 
+      60    933.56056   -107.73478   -104.95932    92508.792 
+      70    851.18518   -107.48766   -104.95711     13993.28 
+      80    999.80265   -107.93146   -104.95906    36700.417 
+      90    998.77707   -107.92569   -104.95634     107233.7 
+     100    1281.4446   -108.76961   -104.95989    49703.193 
+Loop time of 10.6388 on 1 procs for 100 steps with 24 atoms
+
+Performance: 0.203 ns/day, 118.209 hours/ns, 9.400 timesteps/s
+6459.7% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 7.6771e-05 | 7.6771e-05 | 7.6771e-05 |   0.0 |  0.00
+Bond    | 7.5817e-05 | 7.5817e-05 | 7.5817e-05 |   0.0 |  0.00
+Neigh   | 4.6015e-05 | 4.6015e-05 | 4.6015e-05 |   0.0 |  0.00
+Comm    | 0.00031829 | 0.00031829 | 0.00031829 |   0.0 |  0.00
+Output  | 0.00032401 | 0.00032401 | 0.00032401 |   0.0 |  0.00
+Modify  | 10.637     | 10.637     | 10.637     |   0.0 | 99.99
+Other   |            | 0.00052    |            |       |  0.00
+
+Nlocal:    24 ave 24 max 24 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    77 ave 77 max 77 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    31 ave 31 max 31 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 31
+Ave neighs/atom = 1.29167
+Ave special neighs/atom = 0
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:10
diff --git a/examples/latte/log.21Jun18.latte.water.min.g++.1 b/examples/latte/log.21Jun18.latte.water.min.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..1c8921fd60cdb482ffb3650e14bb3250ee83e011
--- /dev/null
+++ b/examples/latte/log.21Jun18.latte.water.min.g++.1
@@ -0,0 +1,108 @@
+LAMMPS (11 May 2018)
+# simple water model with LATTE
+
+units		metal
+atom_style	full
+atom_modify     sort 0 0.0    # turn off sorting of the coordinates
+
+read_data       data.water
+  orthogonal box = (0 0 0) to (6.267 6.267 6.267)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  24 atoms
+  0 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+# replicate system if requested
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable        nrep equal v_x*v_y*v_z
+if              "${nrep} > 1" then "replicate $x $y $z"
+
+# initialize system
+
+velocity	all create 0.0 87287 loop geom
+
+pair_style      zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 0 check yes
+
+timestep        0.00025
+
+fix		1 all nve
+
+fix             2 all latte NULL
+fix_modify      2 energy yes
+
+thermo_style    custom step temp pe etotal press
+
+# minimization
+
+thermo          10
+
+min_style       fire
+minimize        1.0e-4 1.0e-4 500 500
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 7 7 7
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
+Step Temp PotEng TotEng Press 
+       0            0   -104.95594   -104.95594    48236.006 
+      10     349.4534   -105.50948   -104.47056    62157.729 
+      20    1253.6636   -107.00863   -103.28151    116456.71 
+      30    134.64051   -107.56155   -107.16127    59864.196 
+      40    2.4044989    -108.1527   -108.14556    32695.648 
+      47    137.26885   -108.30413   -107.89603    60177.442 
+Loop time of 6.42677 on 1 procs for 47 steps with 24 atoms
+
+6481.9% CPU use with 1 MPI tasks x no OpenMP threads
+
+Minimization stats:
+  Stopping criterion = energy tolerance
+  Energy initial, next-to-last, final = 
+        -104.955944301     -108.302982895     -108.304126127
+  Force two-norm initial, final = 19.119 3.44609
+  Force max component initial, final = 11.7758 1.3408
+  Final line search alpha, max atom move = 0 0
+  Iterations, force evaluations = 47 47
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 4.6253e-05 | 4.6253e-05 | 4.6253e-05 |   0.0 |  0.00
+Bond    | 3.1948e-05 | 3.1948e-05 | 3.1948e-05 |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00014353 | 0.00014353 | 0.00014353 |   0.0 |  0.00
+Output  | 0.00012302 | 0.00012302 | 0.00012302 |   0.0 |  0.00
+Modify  | 6.426      | 6.426      | 6.426      |   0.0 | 99.99
+Other   |            | 0.0004699  |            |       |  0.01
+
+Nlocal:    24 ave 24 max 24 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    71 ave 71 max 71 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    37 ave 37 max 37 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 37
+Ave neighs/atom = 1.54167
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:06
diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index 21663ad85cb161c5acf97ee0463f295beaf58c17..0d7214260da804c79a26413b51458623f71c078a 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -79,7 +79,10 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
        $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
        $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
        $(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
-       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o
+       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
+       $(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
+       $(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
+       $(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
 
 CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
        $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@@ -137,7 +140,10 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
        $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
        $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
        $(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
-       $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h
+       $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
+       $(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
+       $(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
+       $(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
 
 all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
 
@@ -837,6 +843,42 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
 $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
 
+$(OBJ_DIR)/coul_long_cs.cubin: lal_coul_long_cs.cu lal_precision.h lal_preprocessor.h
+	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_long_cs.cu
+
+$(OBJ_DIR)/coul_long_cs_cubin.h: $(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs.cubin
+	$(BIN2C) -c -n coul_long_cs $(OBJ_DIR)/coul_long_cs.cubin > $(OBJ_DIR)/coul_long_cs_cubin.h
+
+$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp $(OBJ_DIR)/coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
+	$(CUDR) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
+	$(CUDR) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/born_coul_long_cs.cubin: lal_born_coul_long_cs.cu lal_precision.h lal_preprocessor.h
+	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_long_cs.cu
+
+$(OBJ_DIR)/born_coul_long_cs_cubin.h: $(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs.cubin
+	$(BIN2C) -c -n born_coul_long_cs $(OBJ_DIR)/born_coul_long_cs.cubin > $(OBJ_DIR)/born_coul_long_cs_cubin.h
+
+$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp $(OBJ_DIR)/born_coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
+	$(CUDR) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
+	$(CUDR) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/born_coul_wolf_cs.cubin: lal_born_coul_wolf_cs.cu lal_precision.h lal_preprocessor.h
+	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_wolf_cs.cu
+
+$(OBJ_DIR)/born_coul_wolf_cs_cubin.h: $(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs.cubin
+	$(BIN2C) -c -n born_coul_wolf_cs $(OBJ_DIR)/born_coul_wolf_cs.cubin > $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
+
+$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp $(OBJ_DIR)/born_coul_wolf_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
+	$(CUDR) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
+	$(CUDR) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
+
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
 	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
 
diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index 3e73e6256c7b998c449a60e32e2a3974f636b227..86991a1e98f09ef30217e0c003239b9fcff8f11c 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -68,7 +68,10 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
        $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
        $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
        $(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
-       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o
+       $(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
+       $(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
+       $(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
+       $(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
 
 KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
        $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
@@ -98,7 +101,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
        $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
        $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/vashishta_cl.h \
        $(OBJ_DIR)/ufm_cl.h  $(OBJ_DIR)/dipole_long_lj_cl.h \
-       $(OBJ_DIR)/lj_expand_coul_long_cl.h
+       $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/coul_long_cs_cl.h \
+       $(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/born_coul_wolf_cs_cl.h
 
 
 OCL_EXECS = $(BIN_DIR)/ocl_get_devices
@@ -609,6 +613,33 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
 $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
 
+$(OBJ_DIR)/coul_long_cs_cl.h: lal_coul_long_cs.cu $(PRE1_H)
+	$(BSH) ./geryon/file_to_cstr.sh coul_long_cs $(PRE1_H) lal_coul_long_cs.cu $(OBJ_DIR)/coul_long_cs_cl.h;
+
+$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp  $(OBJ_DIR)/coul_long_cs_cl.h $(OBJ_DIR)/coul_long_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
+	$(OCL) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
+	$(OCL) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/born_coul_long_cs_cl.h: lal_born_coul_long_cs.cu $(PRE1_H)
+	$(BSH) ./geryon/file_to_cstr.sh born_coul_long_cs $(PRE1_H) lal_born_coul_long_cs.cu $(OBJ_DIR)/born_coul_long_cs_cl.h;
+
+$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp  $(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
+	$(OCL) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
+	$(OCL) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/born_coul_wolf_cs_cl.h: lal_born_coul_wolf_cs.cu $(PRE1_H)
+	$(BSH) ./geryon/file_to_cstr.sh born_coul_wolf_cs $(PRE1_H) lal_born_coul_wolf_cs.cu $(OBJ_DIR)/born_coul_wolf_cs_cl.h;
+
+$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp  $(OBJ_DIR)/born_coul_wolf_cs_cl.h $(OBJ_DIR)/born_coul_wolf_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
+	$(OCL) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
+	$(OCL) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
+
 $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
 	$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) 
 
diff --git a/lib/gpu/lal_born_coul_long.cpp b/lib/gpu/lal_born_coul_long.cpp
index 68695c49380ea9a0810a98ca0273686c6d3eb61c..116d44d58fd340e13b75439dab4d380dd13de89b 100644
--- a/lib/gpu/lal_born_coul_long.cpp
+++ b/lib/gpu/lal_born_coul_long.cpp
@@ -57,7 +57,7 @@ int BornCoulLongT::init(const int ntypes, double **host_cutsq, double **host_rho
                        const double g_ewald) {
   int success;
   success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
-                            _screen,born_coul_long,"k_born_long");
+                            _screen,born_coul_long,"k_born_coul_long");
   if (success!=0)
     return success;
 
diff --git a/lib/gpu/lal_born_coul_long.cu b/lib/gpu/lal_born_coul_long.cu
index 4cb4ea448fbf74374ce8b7ca7bd95b7cfda6505a..71e5e0ae502e8847bc006788deaf82da9477868d 100644
--- a/lib/gpu/lal_born_coul_long.cu
+++ b/lib/gpu/lal_born_coul_long.cu
@@ -29,7 +29,7 @@ texture<int2> q_tex;
 #define q_tex q_
 #endif
 
-__kernel void k_born_long(const __global numtyp4 *restrict x_,
+__kernel void k_born_coul_long(const __global numtyp4 *restrict x_,
                           const __global numtyp4 *restrict coeff1,
                           const __global numtyp4 *restrict coeff2,
                           const int lj_types,
@@ -110,7 +110,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
           forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
         } else forcecoul = (numtyp)0.0;
 
-        if (rsq < cutsq_sigma[mtype].y) {
+        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
           numtyp r = ucl_sqrt(rsq);
           rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
           r6inv = r2inv*r2inv*r2inv;
@@ -127,7 +127,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
         if (eflag>0) {
           if (rsq < cut_coulsq)
             e_coul += prefactor*(_erfc-factor_coul);
-          if (rsq < coeff1[mtype].w) {
+          if (rsq < cutsq_sigma[mtype].y) {
             numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
               + coeff2[mtype].z*r2inv*r6inv;
             energy+=factor_lj*(e-coeff2[mtype].w);
@@ -149,7 +149,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
   } // if ii
 }
 
-__kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
+__kernel void k_born_coul_long_fast(const __global numtyp4 *restrict x_,
                                const __global numtyp4 *restrict coeff1_in,
                                const __global numtyp4 *restrict coeff2_in,
                                const __global numtyp *restrict sp_lj_in,
@@ -232,7 +232,7 @@ __kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
           forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
         } else forcecoul = (numtyp)0.0;
 
-        if (rsq < cutsq_sigma[mtype].y) {
+        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
           numtyp r = ucl_sqrt(rsq);
           rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
           r6inv = r2inv*r2inv*r2inv;
@@ -249,7 +249,7 @@ __kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
         if (eflag>0) {
           if (rsq < cut_coulsq)
             e_coul += prefactor*(_erfc-factor_coul);
-          if (rsq < coeff1[mtype].w) {
+          if (rsq < cutsq_sigma[mtype].y) {
             numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
               + coeff2[mtype].z*r2inv*r6inv;
             energy+=factor_lj*(e-coeff2[mtype].w);
diff --git a/lib/gpu/lal_born_coul_long.h b/lib/gpu/lal_born_coul_long.h
index e0de27c71cc19091887d0c162af5861e37759df0..d236b7b57feca914beb028aae304c91eddda1ff3 100644
--- a/lib/gpu/lal_born_coul_long.h
+++ b/lib/gpu/lal_born_coul_long.h
@@ -78,7 +78,7 @@ class BornCoulLong : public BaseCharge<numtyp, acctyp> {
 
   numtyp _cut_coulsq, _qqrd2e, _g_ewald;
 
- private:
+ protected:
   bool _allocated;
   void loop(const bool _eflag, const bool _vflag);
 };
diff --git a/lib/gpu/lal_born_coul_long_cs.cpp b/lib/gpu/lal_born_coul_long_cs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e7fb946f148949f3b4d50dc71e662458f32977fd
--- /dev/null
+++ b/lib/gpu/lal_born_coul_long_cs.cpp
@@ -0,0 +1,95 @@
+/***************************************************************************
+                            born_coul_long_cs.cpp
+                             -------------------
+                           Trung Dac Nguyen (Northwestern)
+
+  Class for acceleration of the born/coul/long/cs pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifdef USE_OPENCL
+#include "born_coul_long_cs_cl.h"
+#elif defined(USE_CUDART)
+const char *born_coul_long_cs=0;
+#else
+#include "born_coul_long_cs_cubin.h"
+#endif
+
+#include "lal_born_coul_long_cs.h"
+#include <cassert>
+using namespace LAMMPS_AL;
+#define BornCoulLongCST BornCoulLongCS<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+int BornCoulLongCST::init(const int ntypes, double **host_cutsq, double **host_rhoinv,
+                       double **host_born1, double **host_born2, double **host_born3,
+                       double **host_a, double **host_c, double **host_d,
+                       double **host_sigma, double **host_offset,
+                       double *host_special_lj, const int nlocal,
+                       const int nall, const int max_nbors,
+                       const int maxspecial, const double cell_size,
+                       const double gpu_split, FILE *_screen,
+                       double **host_cut_ljsq, const double host_cut_coulsq,
+                       double *host_special_coul, const double qqrd2e,
+                       const double g_ewald) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
+                            _screen,born_coul_long_cs,"k_born_coul_long_cs");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  this->shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    this->shared_types=true;
+  }
+  this->_lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  this->coeff1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->coeff1,host_write,host_rhoinv,
+                         host_born1,host_born2,host_born3);
+
+  this->coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->coeff2,host_write,host_a,host_c,
+                         host_d,host_offset);
+
+  this->cutsq_sigma.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->cutsq_sigma,host_write,host_cutsq,
+             host_cut_ljsq,host_sigma);
+
+  this->sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_lj[i];
+    host_write[i+4]=host_special_coul[i];
+  }
+  ucl_copy(this->sp_lj,host_write,8,false);
+
+  this->_cut_coulsq=host_cut_coulsq;
+  this->_qqrd2e=qqrd2e;
+  this->_g_ewald=g_ewald;
+
+  this->_allocated=true;
+  this->_max_bytes=this->coeff1.row_bytes()+this->coeff2.row_bytes()
+      +this->cutsq_sigma.row_bytes()+this->sp_lj.row_bytes();
+  return 0;
+}
+
+template class BornCoulLongCS<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b3e79d9ec841afd0d31cf64b5af1c74f9469a811
--- /dev/null
+++ b/lib/gpu/lal_born_coul_long_cs.cu
@@ -0,0 +1,325 @@
+// **************************************************************************
+//                            born_coul_long_cs.cu
+//                             -------------------
+//                           Trung Dac Nguyen (Northwestern)
+//
+//  Device code for acceleration of the born/coul/long/cs pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : June 2018
+//    email                : ndactrung@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+texture<float4> pos_tex;
+texture<float> q_tex;
+#else
+texture<int4,1> pos_tex;
+texture<int2> q_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+// Note: EWALD_P is different from that in lal_preprocessor.h
+//       acctyp is needed for these parameters
+#define CS_EWALD_P (acctyp)9.95473818e-1
+#define B0        (acctyp)-0.1335096380159268
+#define B1        (acctyp)-2.57839507e-1
+#define B2        (acctyp)-1.37203639e-1
+#define B3        (acctyp)-8.88822059e-3
+#define B4        (acctyp)-5.80844129e-3
+#define B5        (acctyp)1.14652755e-1
+
+#define EPSILON (acctyp)(1.0e-20)
+#define EPS_EWALD (acctyp)(1.0e-6)
+#define EPS_EWALD_SQR (acctyp)(1.0e-12)
+
+__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
+                          const __global numtyp4 *restrict coeff1,
+                          const __global numtyp4 *restrict coeff2,
+                          const int lj_types,
+                          const __global numtyp *restrict sp_lj_in,
+                          const __global int *dev_nbor,
+                          const __global int *dev_packed,
+                          __global acctyp4 *restrict ans,
+                          __global acctyp *restrict engv,
+                          const int eflag, const int vflag, const int inum,
+                          const int nbor_pitch,
+                          const __global numtyp *restrict q_,
+                          const __global numtyp4 *restrict cutsq_sigma,
+                          const numtyp cut_coulsq, const numtyp qqrd2e,
+                          const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_lj[8];
+  sp_lj[0]=sp_lj_in[0];
+  sp_lj[1]=sp_lj_in[1];
+  sp_lj[2]=sp_lj_in[2];
+  sp_lj[3]=sp_lj_in[3];
+  sp_lj[4]=sp_lj_in[4];
+  sp_lj[5]=sp_lj_in[5];
+  sp_lj[6]=sp_lj_in[6];
+  sp_lj[7]=sp_lj_in[7];
+
+  acctyp energy=(acctyp)0;
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int itype=ix.w;
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<cutsq_sigma[mtype].x) { // cutsq 
+        numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
+
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        numtyp r2inv = ucl_recip(rsq);
+
+        if (rsq < cut_coulsq) {
+          numtyp r = ucl_sqrt(rsq);
+          fetch(prefactor,j,q_tex);
+          prefactor *= qqrd2e * qtmp;
+          if (factor_coul<(numtyp)1.0) {
+            numtyp grij = g_ewald * (r+EPS_EWALD);
+            numtyp expm2 = ucl_exp(-grij*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            numtyp u = (numtyp)1.0 - t;
+            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= (r+EPS_EWALD);
+            forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
+            // Additionally r2inv needs to be accordingly modified since the later
+            // scaling of the overall force shall be consistent
+            r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
+          } else {
+            numtyp grij = g_ewald * r;
+            numtyp expm2 = ucl_exp(-grij*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            numtyp u = (numtyp)1.0 - t;
+            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= r;
+            forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
+          }
+        } else forcecoul = (numtyp)0.0;
+
+        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
+          numtyp r = ucl_sqrt(rsq);
+          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
+          r6inv = r2inv*r2inv*r2inv;
+          forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
+            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
+        } else forceborn = (numtyp)0.0;
+
+        force = (forcecoul + forceborn) * r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          if (rsq < cut_coulsq) {
+            numtyp e = prefactor*_erfc;
+            if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+            e_coul += e;
+          }
+          if (rsq < cutsq_sigma[mtype].y) {
+            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
+              + coeff2[mtype].z*r2inv*r6inv;
+            energy+=factor_lj*(e-coeff2[mtype].w);
+          }
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                    vflag,ans,engv);
+  } // if ii
+}
+
+__kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
+                               const __global numtyp4 *restrict coeff1_in,
+                               const __global numtyp4 *restrict coeff2_in,
+                               const __global numtyp *restrict sp_lj_in,
+                               const __global int *dev_nbor,
+                               const __global int *dev_packed,
+                               __global acctyp4 *restrict ans,
+                               __global acctyp *restrict engv,
+                               const int eflag, const int vflag, const int inum,
+                               const int nbor_pitch,
+                               const __global numtyp *restrict q_,
+                               const __global numtyp4 *restrict cutsq_sigma,
+                               const numtyp cut_coulsq, const numtyp qqrd2e,
+                               const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp4 coeff1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[8];
+  if (tid<8)
+    sp_lj[tid]=sp_lj_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff1[tid]=coeff1_in[tid];
+    if (eflag>0)
+      coeff2[tid]=coeff2_in[tid];
+  }
+
+  acctyp energy=(acctyp)0;
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_sigma[mtype].x) { // cutsq 
+        numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
+
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        numtyp r2inv = ucl_recip(rsq);
+
+        if (rsq < cut_coulsq) {
+          numtyp r = ucl_sqrt(rsq);
+          fetch(prefactor,j,q_tex);
+          prefactor *= qqrd2e * qtmp;
+          if (factor_coul<(numtyp)1.0) {
+            numtyp grij = g_ewald * (r+EPS_EWALD);
+            numtyp expm2 = ucl_exp(-grij*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            numtyp u = (numtyp)1.0 - t;
+            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= (r+EPS_EWALD);
+            forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
+            // Additionally r2inv needs to be accordingly modified since the later
+            // scaling of the overall force shall be consistent
+            r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
+          } else {
+            numtyp grij = g_ewald * r;
+            numtyp expm2 = ucl_exp(-grij*grij);
+            acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+            numtyp u = (numtyp)1.0 - t;
+            _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= r;
+            forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
+          }
+        } else forcecoul = (numtyp)0.0;
+
+        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
+          numtyp r = ucl_sqrt(rsq);
+          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
+          r6inv = r2inv*r2inv*r2inv;
+          forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
+            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
+        } else forceborn = (numtyp)0.0;
+
+        force = (forcecoul + forceborn) * r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          if (rsq < cut_coulsq) {
+            numtyp e = prefactor*_erfc;
+            if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+            e_coul += e;
+          }
+          if (rsq < cutsq_sigma[mtype].y) {
+            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
+              + coeff2[mtype].z*r2inv*r6inv;
+            energy+=factor_lj*(e-coeff2[mtype].w);
+          }
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                    vflag,ans,engv);
+  } // if ii
+}
+
diff --git a/lib/gpu/lal_born_coul_long_cs.h b/lib/gpu/lal_born_coul_long_cs.h
new file mode 100644
index 0000000000000000000000000000000000000000..df0fa0fcd41c790932fd9f4bedd2eb8e982c4865
--- /dev/null
+++ b/lib/gpu/lal_born_coul_long_cs.h
@@ -0,0 +1,53 @@
+/***************************************************************************
+                             born_coul_long_cs.h
+                             -------------------
+                         Trung Dac Nguyen (Northwestern)
+
+  Class for acceleration of the born/coul/long/cs pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_BORN_COUL_LONG_CS_H
+#define LAL_BORN_COUL_LONG_CS_H
+
+#include "lal_born_coul_long.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class BornCoulLongCS : public BornCoulLong<numtyp, acctyp> {
+ public:
+  BornCoulLongCS() {}
+  ~BornCoulLongCS() {}
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successfull
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq, double **host_rhoinv,
+           double **host_born1, double **host_born2, double **host_born3,
+           double **host_a, double **host_c, double **host_d,
+           double **host_sigma, double **host_offset, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen, double **host_cut_ljsq,
+           const double host_cut_coulsq, double *host_special_coul,
+           const double qqrd2e, const double g_ewald);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_born_coul_long_cs_ext.cpp b/lib/gpu/lal_born_coul_long_cs_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..badc8b0808488569685a7d11c5224beb720ce534
--- /dev/null
+++ b/lib/gpu/lal_born_coul_long_cs_ext.cpp
@@ -0,0 +1,132 @@
+/***************************************************************************
+                           born_coul_long_cs_ext.cpp
+                             -------------------
+                           Trung Dac Nguyen (ORNL)
+
+  Functions for LAMMPS access to born/coul/long/cs acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_born_coul_long_cs.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static BornCoulLongCS<PRECISION,ACC_PRECISION> BCLCSMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
+                    double **host_born1, double **host_born2, double **host_born3,
+                    double **host_a, double **host_c, double **host_d,
+                    double **sigma, double **offset, double *special_lj,
+                    const int inum, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size, int &gpu_mode,
+                    FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
+                    double *host_special_coul, const double qqrd2e,
+                    const double g_ewald) {
+  BCLCSMF.clear();
+  gpu_mode=BCLCSMF.device->gpu_mode();
+  double gpu_split=BCLCSMF.device->particle_split();
+  int first_gpu=BCLCSMF.device->first_device();
+  int last_gpu=BCLCSMF.device->last_device();
+  int world_me=BCLCSMF.device->world_me();
+  int gpu_rank=BCLCSMF.device->gpu_rank();
+  int procs_per_gpu=BCLCSMF.device->procs_per_gpu();
+
+  BCLCSMF.device->init_message(screen,"born/coul/long/cs",first_gpu,last_gpu);
+
+  bool message=false;
+  if (BCLCSMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=BCLCSMF.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
+                          host_born3, host_a, host_c, host_d, sigma, offset,
+                          special_lj, inum, nall, 300, maxspecial, cell_size,
+                          gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
+                          host_special_coul, qqrd2e, g_ewald);
+
+  BCLCSMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=BCLCSMF.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
+                            host_born3, host_a, host_c, host_d, sigma, offset,
+                            special_lj, inum, nall, 300, maxspecial, cell_size,
+                            gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
+                            host_special_coul, qqrd2e, g_ewald);
+
+    BCLCSMF.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    BCLCSMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void bornclcs_gpu_clear() {
+  BCLCSMF.clear();
+}
+
+int** bornclcs_gpu_compute_n(const int ago, const int inum_full,
+                           const int nall, double **host_x, int *host_type,
+                           double *sublo, double *subhi, tagint *tag, int **nspecial,
+                           tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start,
+                           int **ilist, int **jnum,  const double cpu_time,
+                           bool &success, double *host_q, double *boxlo,
+                           double *prd) {
+  return BCLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                          subhi, tag, nspecial, special, eflag, vflag, eatom,
+                          vatom, host_start, ilist, jnum, cpu_time, success,
+                          host_q, boxlo, prd);
+}
+
+void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q,
+                        const int nlocal, double *boxlo, double *prd) {
+  BCLCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+                   firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+                   host_q,nlocal,boxlo,prd);
+}
+
+double bornclcs_gpu_bytes() {
+  return BCLCSMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_born_coul_wolf.cpp b/lib/gpu/lal_born_coul_wolf.cpp
index 7ebd7b744fd6501a2710c3ad7143112aa9cf520e..c44b8414635abcb6a872b46544e52c1fa5a738da 100644
--- a/lib/gpu/lal_born_coul_wolf.cpp
+++ b/lib/gpu/lal_born_coul_wolf.cpp
@@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
                         const double alf, const double e_shift, const double f_shift) {
   int success;
   success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
-                            _screen,born_coul_wolf,"k_born_wolf");
+                            _screen,born_coul_wolf,"k_born_coul_wolf");
   if (success!=0)
     return success;
 
diff --git a/lib/gpu/lal_born_coul_wolf.cu b/lib/gpu/lal_born_coul_wolf.cu
index 0dc7d08c638d93bd9d0a5f0c1e37cd26e963e593..2c2249feeb7a34bdc899195aad60284aef51551a 100644
--- a/lib/gpu/lal_born_coul_wolf.cu
+++ b/lib/gpu/lal_born_coul_wolf.cu
@@ -31,7 +31,7 @@ texture<int2> q_tex;
 
 #define MY_PIS (acctyp)1.77245385090551602729
 
-__kernel void k_born_wolf(const __global numtyp4 *restrict x_,
+__kernel void k_born_coul_wolf(const __global numtyp4 *restrict x_,
                           const __global numtyp4 *restrict coeff1,
                           const __global numtyp4 *restrict coeff2,
                           const int lj_types,
@@ -165,7 +165,7 @@ __kernel void k_born_wolf(const __global numtyp4 *restrict x_,
   } // if ii
 }
 
-__kernel void k_born_wolf_fast(const __global numtyp4 *restrict x_,
+__kernel void k_born_coul_wolf_fast(const __global numtyp4 *restrict x_,
                                const __global numtyp4 *restrict coeff1_in,
                                const __global numtyp4 *restrict coeff2_in,
                                const __global numtyp *restrict sp_lj_in,
diff --git a/lib/gpu/lal_born_coul_wolf.h b/lib/gpu/lal_born_coul_wolf.h
index 4b2406b989be7d41d57c3b6cb66c035d491d937d..4b817ee0ced7df7d975c14de7cf20fa2410cdea3 100644
--- a/lib/gpu/lal_born_coul_wolf.h
+++ b/lib/gpu/lal_born_coul_wolf.h
@@ -13,8 +13,8 @@
     email                : nguyentd@ornl.gov
  ***************************************************************************/
 
-#ifndef LAL_BORN_COUL_LONG_H
-#define LAL_BORN_COUL_LONG_H
+#ifndef LAL_BORN_COUL_WOLF_H
+#define LAL_BORN_COUL_WOLF_H
 
 #include "lal_base_charge.h"
 
@@ -79,7 +79,7 @@ class BornCoulWolf : public BaseCharge<numtyp, acctyp> {
 
   numtyp _cut_coulsq,_qqrd2e,_alf,_e_shift,_f_shift;
 
- private:
+ protected:
   bool _allocated;
   void loop(const bool _eflag, const bool _vflag);
 };
diff --git a/lib/gpu/lal_born_coul_wolf_cs.cpp b/lib/gpu/lal_born_coul_wolf_cs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bdb1c31e557b9f91f9308036cdf31ee46d87271f
--- /dev/null
+++ b/lib/gpu/lal_born_coul_wolf_cs.cpp
@@ -0,0 +1,97 @@
+/***************************************************************************
+                            born_coul_wolf_cs.cpp
+                             -------------------
+                        Trung Dac Nguyen (Northwestern)
+
+  Class for acceleration of the born/coul/wolf/cs pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifdef USE_OPENCL
+#include "born_coul_wolf_cs_cl.h"
+#elif defined(USE_CUDART)
+const char *born_coul_wolf_cs=0;
+#else
+#include "born_coul_wolf_cs_cubin.h"
+#endif
+
+#include "lal_born_coul_wolf_cs.h"
+#include <cassert>
+using namespace LAMMPS_AL;
+#define BornCoulWolfCST BornCoulWolfCS<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_rhoinv,
+                        double **host_born1, double **host_born2, double **host_born3,
+                        double **host_a, double **host_c, double **host_d,
+                        double **host_sigma, double **host_offset,
+                        double *host_special_lj, const int nlocal,
+                        const int nall, const int max_nbors,
+                        const int maxspecial, const double cell_size,
+                        const double gpu_split, FILE *_screen,
+                        double **host_cut_ljsq, const double host_cut_coulsq,
+                        double *host_special_coul, const double qqrd2e,
+                        const double alf, const double e_shift, const double f_shift) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
+                            _screen,born_coul_wolf_cs,"k_born_coul_wolf_cs");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  this->shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    this->shared_types=true;
+  }
+  this->_lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  this->coeff1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->coeff1,host_write,host_rhoinv,
+                         host_born1,host_born2,host_born3);
+
+  this->coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->coeff2,host_write,host_a,host_c,
+                                     host_d,host_offset);
+
+  this->cutsq_sigma.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,this->cutsq_sigma,host_write,host_cutsq,
+                         host_cut_ljsq,host_sigma);
+
+  this->sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_lj[i];
+    host_write[i+4]=host_special_coul[i];
+  }
+  ucl_copy(this->sp_lj,host_write,8,false);
+
+  this->_cut_coulsq=host_cut_coulsq;
+  this->_qqrd2e=qqrd2e;
+  this->_alf=alf;
+  this->_e_shift=e_shift;
+  this->_f_shift=f_shift;
+
+  this->_allocated=true;
+  this->_max_bytes=this->coeff1.row_bytes()+this->coeff2.row_bytes()
+      +this->cutsq_sigma.row_bytes()+this->sp_lj.row_bytes();
+  return 0;
+}
+
+template class BornCoulWolfCS<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/lal_born_coul_wolf_cs.cu b/lib/gpu/lal_born_coul_wolf_cs.cu
new file mode 100644
index 0000000000000000000000000000000000000000..847387bfe8c1adbe2e1f3f2a3ed21964d08c5989
--- /dev/null
+++ b/lib/gpu/lal_born_coul_wolf_cs.cu
@@ -0,0 +1,306 @@
+// **************************************************************************
+//                            born_coul_wolf_cs.cu
+//                             -------------------
+//                         Trung Dac Nguyen (Northwestern)
+//
+//  Device code for acceleration of the born/coul/wolf/cs pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                :
+//    email                : ndactrung@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+texture<float4> pos_tex;
+texture<float> q_tex;
+#else
+texture<int4,1> pos_tex;
+texture<int2> q_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+#define EPSILON (acctyp)(1.0e-20)
+#define MY_PIS (acctyp)1.77245385090551602729
+
+__kernel void k_born_coul_wolf_cs(const __global numtyp4 *restrict x_,
+                          const __global numtyp4 *restrict coeff1,
+                          const __global numtyp4 *restrict coeff2,
+                          const int lj_types,
+                          const __global numtyp *restrict sp_lj_in,
+                          const __global int *dev_nbor,
+                          const __global int *dev_packed,
+                          __global acctyp4 *restrict ans,
+                          __global acctyp *restrict engv,
+                          const int eflag, const int vflag, const int inum,
+                          const int nbor_pitch,
+                          const __global numtyp *restrict q_,
+                          const __global numtyp4 *restrict cutsq_sigma,
+                          const numtyp cut_coulsq, const numtyp qqrd2e,
+                          const numtyp alf, const numtyp e_shift,
+                          const numtyp f_shift, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_lj[8];
+  sp_lj[0]=sp_lj_in[0];
+  sp_lj[1]=sp_lj_in[1];
+  sp_lj[2]=sp_lj_in[2];
+  sp_lj[3]=sp_lj_in[3];
+  sp_lj[4]=sp_lj_in[4];
+  sp_lj[5]=sp_lj_in[5];
+  sp_lj[6]=sp_lj_in[6];
+  sp_lj[7]=sp_lj_in[7];
+
+  acctyp energy=(acctyp)0;
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int itype=ix.w;
+
+    if (eflag>0) {
+      acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
+        qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
+      e_coul += (acctyp)2.0*e_self;
+    }
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<cutsq_sigma[mtype].x) { // cutsq
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        acctyp r2inv = ucl_recip(rsq);
+
+        numtyp forcecoul,forceborn,force,prefactor,rexp;
+        acctyp v_sh,r6inv;
+
+        if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
+          numtyp r = ucl_sqrt(rsq);
+          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
+          r6inv = r2inv*r2inv*r2inv;
+          forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
+            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
+        } else forceborn = (numtyp)0.0;
+
+        if (rsq < cut_coulsq) {
+          numtyp r = ucl_rsqrt(r2inv);
+          acctyp arij = alf * r;
+          acctyp erfcd = ucl_exp(-arij*arij);
+          fetch(prefactor,j,q_tex);
+          prefactor *= qqrd2e * qtmp/r;
+
+          const acctyp erfcc = erfc(arij);
+          v_sh = (erfcc - e_shift*r)*prefactor;
+          acctyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
+          forcecoul = prefactor * dvdrr*rsq;
+          if (factor_coul < (numtyp)1.0) forcecoul -= ((numtyp)1.0-factor_coul)*prefactor;
+        } else forcecoul = (numtyp)0.0;
+
+        force = (forceborn + forcecoul) * r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          if (rsq < cut_coulsq) {
+            acctyp e=v_sh;
+            if (factor_coul < (numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+            e_coul += e;
+          }
+          if (rsq < cutsq_sigma[mtype].y) {
+            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
+              + coeff2[mtype].z*r2inv*r6inv;
+            energy+=factor_lj*(e-coeff2[mtype].w);
+          }
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                    vflag,ans,engv);
+  } // if ii
+}
+
+__kernel void k_born_coul_wolf_cs_fast(const __global numtyp4 *restrict x_,
+                               const __global numtyp4 *restrict coeff1_in,
+                               const __global numtyp4 *restrict coeff2_in,
+                               const __global numtyp *restrict sp_lj_in,
+                               const __global int *dev_nbor,
+                               const __global int *dev_packed,
+                               __global acctyp4 *restrict ans,
+                               __global acctyp *restrict engv,
+                               const int eflag, const int vflag, const int inum,
+                               const int nbor_pitch,
+                               const __global numtyp *restrict q_,
+                               const __global numtyp4 *restrict cutsq_sigma,
+                               const numtyp cut_coulsq, const numtyp qqrd2e,
+                               const numtyp alf, const numtyp e_shift,
+                               const numtyp f_shift, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp4 coeff1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[8];
+  if (tid<8)
+    sp_lj[tid]=sp_lj_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff1[tid]=coeff1_in[tid];
+    if (eflag>0)
+      coeff2[tid]=coeff2_in[tid];
+  }
+
+  acctyp energy=(acctyp)0;
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    if (eflag>0) {
+      acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
+        qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
+      e_coul += (acctyp)2.0*e_self;
+    }
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      acctyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_sigma[mtype].x) {
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        acctyp r2inv = ucl_recip(rsq);
+
+        numtyp forcecoul,forceborn,force,prefactor,rexp;
+        acctyp v_sh,r6inv;
+
+        if (rsq < cutsq_sigma[mtype].y) {
+          r6inv = r2inv*r2inv*r2inv;
+          numtyp r = ucl_sqrt(rsq);
+          rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
+          forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
+            + coeff1[mtype].w*r2inv*r6inv)*factor_lj;
+        } else forceborn = (numtyp)0.0;
+
+       if (rsq < cut_coulsq) {
+          numtyp r = ucl_sqrt(rsq);
+          acctyp arij = alf * r;
+          acctyp erfcd = ucl_exp(-arij*arij);
+          fetch(prefactor,j,q_tex);
+          prefactor *= qqrd2e * qtmp/r;
+
+          const acctyp erfcc = erfc(arij);
+          v_sh = (erfcc - e_shift*r)*prefactor;
+          acctyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
+          forcecoul = prefactor * dvdrr*rsq;
+          if (factor_coul < (numtyp)1.0) forcecoul -= ((numtyp)1.0-factor_coul)*prefactor;
+        } else forcecoul = (numtyp)0.0;
+
+        force = (forceborn + forcecoul) * r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          if (rsq < cut_coulsq) {
+            acctyp e=v_sh;
+            if (factor_coul < (numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+            e_coul += e;
+          }
+          if (rsq < cutsq_sigma[mtype].y) {
+            numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
+              + coeff2[mtype].z*r2inv*r6inv;
+            energy+=factor_lj*(e-coeff2[mtype].w);
+          }
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                    vflag,ans,engv);
+  } // if ii
+}
+
diff --git a/lib/gpu/lal_born_coul_wolf_cs.h b/lib/gpu/lal_born_coul_wolf_cs.h
new file mode 100644
index 0000000000000000000000000000000000000000..1d9de0a457f04027118aebe5e9f4c1c6c6561ad3
--- /dev/null
+++ b/lib/gpu/lal_born_coul_wolf_cs.h
@@ -0,0 +1,54 @@
+/***************************************************************************
+                              born_coul_wolf_cs.h
+                             -------------------
+                           Trung Dac Nguyen (Northwestern)
+
+  Class for acceleration of the born/coul/wolf/cs pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_BORN_COUL_WOLF_CS_H
+#define LAL_BORN_COUL_WOLF_CS_H
+
+#include "lal_born_coul_wolf.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class BornCoulWolfCS : public BornCoulWolf<numtyp, acctyp> {
+ public:
+  BornCoulWolfCS() {}
+  ~BornCoulWolfCS() {}
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successfull
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq, double **host_rhoinv,
+           double **host_born1, double **host_born2, double **host_born3,
+           double **host_a, double **host_c, double **host_d,
+           double **host_sigma, double **host_offset, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen, double **host_cut_ljsq,
+           const double host_cut_coulsq, double *host_special_coul,
+           const double qqrd2e, const double alf, const double e_shift,
+           const double f_shift);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e2211644af1f5b662a4750686ae5afa28216441c
--- /dev/null
+++ b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp
@@ -0,0 +1,134 @@
+/***************************************************************************
+                           born_coul_wolf_cs_ext.cpp
+                             -------------------
+                           Trung Dac Nguyen (Northwestern)
+
+  Functions for LAMMPS access to born/coul/wolf/cs acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_born_coul_wolf_cs.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static BornCoulWolfCS<PRECISION,ACC_PRECISION> BornCWCST;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
+                    double **host_born1, double **host_born2, double **host_born3,
+                    double **host_a, double **host_c, double **host_d,
+                    double **sigma, double **offset, double *special_lj, const int inum,
+                    const int nall, const int max_nbors, const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen,
+                    double **host_cut_ljsq, double host_cut_coulsq,
+                    double *host_special_coul, const double qqrd2e,
+                    const double alf, const double e_shift, const double f_shift) {
+  BornCWCST.clear();
+  gpu_mode=BornCWCST.device->gpu_mode();
+  double gpu_split=BornCWCST.device->particle_split();
+  int first_gpu=BornCWCST.device->first_device();
+  int last_gpu=BornCWCST.device->last_device();
+  int world_me=BornCWCST.device->world_me();
+  int gpu_rank=BornCWCST.device->gpu_rank();
+  int procs_per_gpu=BornCWCST.device->procs_per_gpu();
+
+  BornCWCST.device->init_message(screen,"born/coul/wolf/cs",first_gpu,last_gpu);
+
+  bool message=false;
+  if (BornCWCST.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=BornCWCST.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
+                          host_born3, host_a, host_c, host_d, sigma,
+                          offset, special_lj, inum, nall, 300,
+                          maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
+                          host_cut_coulsq, host_special_coul, qqrd2e,
+                          alf, e_shift, f_shift);
+
+  BornCWCST.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=BornCWCST.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
+                            host_born3, host_a, host_c, host_d, sigma,
+                            offset, special_lj, inum, nall, 300,
+                            maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
+                            host_cut_coulsq, host_special_coul, qqrd2e,
+                            alf, e_shift, f_shift);
+
+    BornCWCST.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    BornCWCST.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void borncwcs_gpu_clear() {
+  BornCWCST.clear();
+}
+
+int** borncwcs_gpu_compute_n(const int ago, const int inum_full,
+                           const int nall, double **host_x, int *host_type,
+                           double *sublo, double *subhi, tagint *tag, int **nspecial,
+                           tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start,
+                           int **ilist, int **jnum,  const double cpu_time,
+                           bool &success, double *host_q, double *boxlo,
+                           double *prd) {
+  return BornCWCST.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                          subhi, tag, nspecial, special, eflag, vflag, eatom,
+                          vatom, host_start, ilist, jnum, cpu_time, success,
+                          host_q, boxlo, prd);
+}
+
+void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q,
+                        const int nlocal, double *boxlo, double *prd) {
+  BornCWCST.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+                   firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+                   host_q,nlocal,boxlo,prd);
+}
+
+double borncwcs_gpu_bytes() {
+  return BornCWCST.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_coul_long.h b/lib/gpu/lal_coul_long.h
index d12198fccc48fe37e262d1dfa63cdfea0b7ae844..ae25b91dc77a9322449fbcc5c1f96d994710838c 100644
--- a/lib/gpu/lal_coul_long.h
+++ b/lib/gpu/lal_coul_long.h
@@ -72,7 +72,7 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
 
   numtyp _cut_coulsq, _qqrd2e, _g_ewald;
 
- private:
+ protected:
   bool _allocated;
   void loop(const bool _eflag, const bool _vflag);
 };
diff --git a/lib/gpu/lal_coul_long_cs.cpp b/lib/gpu/lal_coul_long_cs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7afa0ae5d21bfb95bbf5c18315907250e7155318
--- /dev/null
+++ b/lib/gpu/lal_coul_long_cs.cpp
@@ -0,0 +1,78 @@
+/***************************************************************************
+                              coul_long_cs.cpp
+                             -------------------
+                           Trung Nguyen (Northwestern)
+
+  Class for acceleration of the coul/long pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : June 2018
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "coul_long_cs_cl.h"
+#elif defined(USE_CUDART)
+const char *coul_long_cs=0;
+#else
+#include "coul_long_cs_cubin.h"
+#endif
+
+#include "lal_coul_long_cs.h"
+#include <cassert>
+using namespace LAMMPS_AL;
+#define CoulLongCST CoulLongCS<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> pair_gpu_device;
+
+template <class numtyp, class acctyp>
+int CoulLongCST::init(const int ntypes, double **host_scale,
+                    const int nlocal, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size,
+                    const double gpu_split, FILE *_screen,
+                    const double host_cut_coulsq, double *host_special_coul,
+                    const double qqrd2e, const double g_ewald) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,coul_long_cs,"k_coul_long_cs");
+  if (success!=0)
+    return success;
+
+  int lj_types=ntypes;
+  this->shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    this->shared_types=true;
+  }
+  this->_lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  this->scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack1(ntypes,lj_types,this->scale,host_write,host_scale);
+
+  this->sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_coul[i];
+  }
+  ucl_copy(this->sp_cl,host_write,4,false);
+
+  this->_cut_coulsq=host_cut_coulsq;
+  this->_qqrd2e=qqrd2e;
+  this->_g_ewald=g_ewald;
+
+  this->_allocated=true;
+  this->_max_bytes=this->scale.row_bytes()+this->sp_cl.row_bytes();
+  return 0;
+}
+
+template class CoulLongCS<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/lal_coul_long_cs.cu b/lib/gpu/lal_coul_long_cs.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3c34666131d2d363c03bc6f684bd9b573b54c043
--- /dev/null
+++ b/lib/gpu/lal_coul_long_cs.cu
@@ -0,0 +1,367 @@
+// **************************************************************************
+//                               coul_long_cs.cu
+//                             -------------------
+//                           Trung Nguyen (Northwestern)
+//
+//  Device code for acceleration of the coul/long/cs pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : June 2018
+//    email                : ndactrung@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+texture<float4> pos_tex;
+texture<float> q_tex;
+#else
+texture<int4,1> pos_tex;
+texture<int2> q_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+// Note: EWALD_P is different from that in lal_preprocessor.h
+//       acctyp is needed for these parameters
+#define CS_EWALD_P (acctyp)9.95473818e-1
+#define B0        (acctyp)-0.1335096380159268
+#define B1        (acctyp)-2.57839507e-1
+#define B2        (acctyp)-1.37203639e-1
+#define B3        (acctyp)-8.88822059e-3
+#define B4        (acctyp)-5.80844129e-3
+#define B5        (acctyp)1.14652755e-1
+
+#define EPSILON (acctyp)(1.0e-20)
+#define EPS_EWALD (acctyp)(1.0e-6)
+#define EPS_EWALD_SQR (acctyp)(1.0e-12)
+
+#if (ARCH < 300)
+
+#define store_answers_lq(f, e_coul, virial, ii, inum, tid,                  \
+                        t_per_atom, offset, eflag, vflag, ans, engv)        \
+  if (t_per_atom>1) {                                                       \
+    __local acctyp red_acc[6][BLOCK_PAIR];                                  \
+                                                                            \
+    red_acc[0][tid]=f.x;                                                    \
+    red_acc[1][tid]=f.y;                                                    \
+    red_acc[2][tid]=f.z;                                                    \
+    red_acc[3][tid]=e_coul;                                                 \
+                                                                            \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+      if (offset < s) {                                                     \
+        for (int r=0; r<4; r++)                                             \
+          red_acc[r][tid] += red_acc[r][tid+s];                             \
+      }                                                                     \
+    }                                                                       \
+                                                                            \
+    f.x=red_acc[0][tid];                                                    \
+    f.y=red_acc[1][tid];                                                    \
+    f.z=red_acc[2][tid];                                                    \
+    e_coul=red_acc[3][tid];                                                 \
+                                                                            \
+    if (vflag>0) {                                                          \
+      for (int r=0; r<6; r++)                                               \
+        red_acc[r][tid]=virial[r];                                          \
+                                                                            \
+      for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                       \
+        if (offset < s) {                                                   \
+          for (int r=0; r<6; r++)                                           \
+            red_acc[r][tid] += red_acc[r][tid+s];                           \
+        }                                                                   \
+      }                                                                     \
+                                                                            \
+      for (int r=0; r<6; r++)                                               \
+        virial[r]=red_acc[r][tid];                                          \
+    }                                                                       \
+  }                                                                         \
+                                                                            \
+  if (offset==0) {                                                          \
+    __global acctyp *ap1=engv+ii;                                           \
+    if (eflag>0) {                                                          \
+      *ap1=(acctyp)0;                                                       \
+      ap1+=inum;                                                            \
+      *ap1=e_coul*(acctyp)0.5;                                              \
+      ap1+=inum;                                                            \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (int i=0; i<6; i++) {                                             \
+        *ap1=virial[i]*(acctyp)0.5;                                         \
+        ap1+=inum;                                                          \
+      }                                                                     \
+    }                                                                       \
+    ans[ii]=f;                                                              \
+  }
+
+#else
+
+#define store_answers_lq(f, e_coul, virial, ii, inum, tid,                  \
+                         t_per_atom, offset, eflag, vflag, ans, engv)       \
+  if (t_per_atom>1) {                                                       \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+        f.x += shfl_xor(f.x, s, t_per_atom);                                \
+        f.y += shfl_xor(f.y, s, t_per_atom);                                \
+        f.z += shfl_xor(f.z, s, t_per_atom);                                \
+        e_coul += shfl_xor(e_coul, s, t_per_atom);                          \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                       \
+          for (int r=0; r<6; r++)                                           \
+            virial[r] += shfl_xor(virial[r], s, t_per_atom);                \
+      }                                                                     \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0) {                                                          \
+    __global acctyp *ap1=engv+ii;                                           \
+    if (eflag>0) {                                                          \
+      *ap1=(acctyp)0;                                                       \
+      ap1+=inum;                                                            \
+      *ap1=e_coul*(acctyp)0.5;                                              \
+      ap1+=inum;                                                            \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (int i=0; i<6; i++) {                                             \
+        *ap1=virial[i]*(acctyp)0.5;                                         \
+        ap1+=inum;                                                          \
+      }                                                                     \
+    }                                                                       \
+    ans[ii]=f;                                                              \
+  }
+
+#endif
+
+__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
+                          const __global numtyp *restrict scale,
+                          const int lj_types,
+                          const __global numtyp *restrict sp_cl_in,
+                          const __global int *dev_nbor,
+                          const __global int *dev_packed,
+                          __global acctyp4 *restrict ans,
+                          __global acctyp *restrict engv,
+                          const int eflag, const int vflag, const int inum,
+                          const int nbor_pitch,
+                          const __global numtyp *restrict q_,
+                          const numtyp cut_coulsq, const numtyp qqrd2e,
+                          const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_cl[4];
+  sp_cl[0]=sp_cl_in[0];
+  sp_cl[1]=sp_cl_in[1];
+  sp_cl[2]=sp_cl_in[2];
+  sp_cl[3]=sp_cl_in[3];
+
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_coul;
+      factor_coul = sp_cl[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq < cut_coulsq) {
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+
+        numtyp force,prefactor,_erfc;
+        numtyp r2inv = ucl_recip(rsq);
+        numtyp r = ucl_rsqrt(r2inv);
+        fetch(prefactor,j,q_tex);
+        prefactor *= qqrd2e * scale[mtype] * qtmp;
+        if (factor_coul<(numtyp)1.0) {
+          numtyp grij = g_ewald * (r+EPS_EWALD);
+          numtyp expm2 = ucl_exp(-grij*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          numtyp u = (numtyp)1.0 - t;
+          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+          prefactor /= (r+EPS_EWALD);
+          force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
+          // Additionally r2inv needs to be accordingly modified since the later
+          // scaling of the overall force shall be consistent
+          r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
+          force *= r2inv;
+        } else {
+          numtyp grij = g_ewald * r;
+          numtyp expm2 = ucl_exp(-grij*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          numtyp u = (numtyp)1.0 - t;
+          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+          prefactor /= r;
+          force = prefactor*(_erfc + EWALD_F*grij*expm2);
+          force *= r2inv;
+        }
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          numtyp e = prefactor*_erfc;
+          if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+          e_coul += e;
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                     vflag,ans,engv);
+  } // if ii
+}
+
+__kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
+                               const __global numtyp *restrict scale_in,
+                               const __global numtyp *restrict sp_cl_in,
+                               const __global int *dev_nbor,
+                               const __global int *dev_packed,
+                               __global acctyp4 *restrict ans,
+                               __global acctyp *restrict engv,
+                               const int eflag, const int vflag, const int inum,
+                               const int nbor_pitch,
+                               const __global numtyp *restrict q_,
+                               const numtyp cut_coulsq, const numtyp qqrd2e,
+                               const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_cl[4];
+  if (tid<4)
+    sp_cl[tid]=sp_cl_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES)
+    scale[tid]=scale_in[tid];
+
+  acctyp e_coul=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_coul;
+      factor_coul = sp_cl[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq < cut_coulsq) {
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+
+        numtyp force,prefactor,_erfc;
+        numtyp r2inv = ucl_recip(rsq);
+        numtyp r = ucl_rsqrt(r2inv);
+        fetch(prefactor,j,q_tex);
+        prefactor *= qqrd2e * scale[mtype] * qtmp;
+        if (factor_coul<(numtyp)1.0) {
+          numtyp grij = g_ewald * (r+EPS_EWALD);
+          numtyp expm2 = ucl_exp(-grij*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          numtyp u = (numtyp)1.0 - t;
+          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+          prefactor /= (r+EPS_EWALD);
+          force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
+          // Additionally r2inv needs to be accordingly modified since the later
+          // scaling of the overall force shall be consistent
+          r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
+        } else {
+          numtyp grij = g_ewald * r;
+          numtyp expm2 = ucl_exp(-grij*grij);
+          acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
+          numtyp u = (numtyp)1.0 - t;
+          _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+          prefactor /= r;
+          force = prefactor * (_erfc + EWALD_F*grij*expm2);
+        }
+
+        force *= r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (eflag>0) {
+          numtyp e = prefactor*_erfc;
+          if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
+          e_coul += e;
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                     vflag,ans,engv);
+  } // if ii
+}
+
diff --git a/lib/gpu/lal_coul_long_cs.h b/lib/gpu/lal_coul_long_cs.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cfcb80fc86c9ab3f3066e086533cad795a378be
--- /dev/null
+++ b/lib/gpu/lal_coul_long_cs.h
@@ -0,0 +1,50 @@
+/***************************************************************************
+                               coul_long_cs.h
+                             -------------------
+                           Trung Nguyen (Northwestern)
+
+  Class for acceleration of the coul/long/cs pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : June 2018
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_COUL_LONG_CS_H
+#define LAL_COUL_LONG_CS_H
+
+#include "lal_coul_long.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class CoulLongCS : public CoulLong<numtyp, acctyp> {
+ public:
+  CoulLongCS() {}
+  ~CoulLongCS() {}
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successfull
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **scale,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen,
+           const double host_cut_coulsq, double *host_special_coul,
+           const double qqrd2e, const double g_ewald);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_coul_long_cs_ext.cpp b/lib/gpu/lal_coul_long_cs_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae57eb20382a0e47e1d987921e521d84a169dfb3
--- /dev/null
+++ b/lib/gpu/lal_coul_long_cs_ext.cpp
@@ -0,0 +1,145 @@
+/***************************************************************************
+                             coul_long_cs_ext.cpp
+                             -------------------
+                           Trung Nguyen (Northwestern)
+
+  Functions for LAMMPS access to coul/long/cs acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : June 2018
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_coul_long_cs.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static CoulLongCS<PRECISION,ACC_PRECISION> CLCSMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int clcs_gpu_init(const int ntypes, double **host_scale,
+                const int inum, const int nall, const int max_nbors,
+                const int maxspecial, const double cell_size, int &gpu_mode,
+                FILE *screen, double host_cut_coulsq, double *host_special_coul,
+                const double qqrd2e, const double g_ewald) {
+  CLCSMF.clear();
+  gpu_mode=CLCSMF.device->gpu_mode();
+  double gpu_split=CLCSMF.device->particle_split();
+  int first_gpu=CLCSMF.device->first_device();
+  int last_gpu=CLCSMF.device->last_device();
+  int world_me=CLCSMF.device->world_me();
+  int gpu_rank=CLCSMF.device->gpu_rank();
+  int procs_per_gpu=CLCSMF.device->procs_per_gpu();
+
+  CLCSMF.device->init_message(screen,"coul/long/cs",first_gpu,last_gpu);
+
+  bool message=false;
+  if (CLCSMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=CLCSMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
+                      cell_size, gpu_split, screen, host_cut_coulsq,
+                      host_special_coul, qqrd2e, g_ewald);
+
+  CLCSMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=CLCSMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
+                        cell_size, gpu_split, screen, host_cut_coulsq,
+                        host_special_coul, qqrd2e, g_ewald);
+
+    CLCSMF.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    CLCSMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+// ---------------------------------------------------------------------------
+// Copy updated coeffs from host to device
+// ---------------------------------------------------------------------------
+void clcs_gpu_reinit(const int ntypes, double **host_scale) {
+  int world_me=CLCSMF.device->world_me();
+  int gpu_rank=CLCSMF.device->gpu_rank();
+  int procs_per_gpu=CLCSMF.device->procs_per_gpu();
+
+  if (world_me==0)
+    CLCSMF.reinit(ntypes, host_scale);
+
+  CLCSMF.device->world_barrier();
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (gpu_rank==i && world_me!=0)
+      CLCSMF.reinit(ntypes, host_scale);
+
+    CLCSMF.device->gpu_barrier();
+  }
+}
+
+void clcs_gpu_clear() {
+  CLCSMF.clear();
+}
+
+int** clcs_gpu_compute_n(const int ago, const int inum_full,
+                       const int nall, double **host_x, int *host_type,
+                       double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag,
+                       const bool eatom, const bool vatom, int &host_start,
+                       int **ilist, int **jnum,  const double cpu_time,
+                       bool &success, double *host_q, double *boxlo,
+                       double *prd) {
+  return CLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                      subhi, tag, nspecial, special, eflag, vflag, eatom,
+                      vatom, host_start, ilist, jnum, cpu_time, success,
+                      host_q, boxlo, prd);
+}
+
+void clcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                    double **host_x, int *host_type, int *ilist, int *numj,
+                    int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start,
+                    const double cpu_time, bool &success, double *host_q,
+                    const int nlocal, double *boxlo, double *prd) {
+  CLCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+               firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+               host_q,nlocal,boxlo,prd);
+}
+
+double clcs_gpu_bytes() {
+  return CLCSMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_lj_expand_coul_long.cpp b/lib/gpu/lal_lj_expand_coul_long.cpp
index 6a3da9b68604ed9e6d4f5ddf936c141b14b90aae..32a4f8120e2e42d40ae6f67bcf960515182b3ca0 100644
--- a/lib/gpu/lal_lj_expand_coul_long.cpp
+++ b/lib/gpu/lal_lj_expand_coul_long.cpp
@@ -10,7 +10,7 @@
  __________________________________________________________________________
 
     begin                :
-    email                : trung.nguyen@northwestern.edu
+    email                : ndactrung@gmail.com
  ***************************************************************************/
 
 #if defined(USE_OPENCL)
diff --git a/lib/gpu/lal_lj_expand_coul_long.cu b/lib/gpu/lal_lj_expand_coul_long.cu
index eaea5b0e12aeada9e58288d3f33b08e61c150b46..aa8f02be8c521134f80201e873ad5636b33783e4 100644
--- a/lib/gpu/lal_lj_expand_coul_long.cu
+++ b/lib/gpu/lal_lj_expand_coul_long.cu
@@ -10,7 +10,7 @@
 // __________________________________________________________________________
 //
 //    begin                :
-//    email                : trung.nguyen@northwestern.edu
+//    email                : ndactrung@gmail.com
 // ***************************************************************************/
 
 #ifdef NV_KERNEL
@@ -94,17 +94,21 @@ __kernel void k_lj_expand_coul_long(const __global numtyp4 *restrict x_,
 
       int mtype=itype*lj_types+jtype;
       if (rsq<lj1[mtype].z) {
-        numtyp r2inv=ucl_recip(rsq);
         numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
+        numtyp r2inv=ucl_recip(rsq);
+        numtyp r = ucl_sqrt(rsq);
 
         if (rsq < lj1[mtype].w) {
-          r6inv = r2inv*r2inv*r2inv;
-          force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
+          numtyp rshift = r - lj3[mtype].w;
+          numtyp rshiftsq = rshift*rshift;
+          numtyp rshift2inv = ucl_recip(rshiftsq);
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
+          force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
+          force_lj *= factor_lj/rshift/r;
         } else
           force_lj = (numtyp)0.0;
 
         if (rsq < cut_coulsq) {
-          numtyp r = ucl_rsqrt(r2inv);
           numtyp grij = g_ewald * r;
           numtyp expm2 = ucl_exp(-grij*grij);
           numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
@@ -115,7 +119,7 @@ __kernel void k_lj_expand_coul_long(const __global numtyp4 *restrict x_,
         } else
           forcecoul = (numtyp)0.0;
 
-        force = (force_lj + forcecoul) * r2inv;
+        force = force_lj + forcecoul*r2inv;
 
         f.x+=delx*force;
         f.y+=dely*force;
@@ -168,8 +172,7 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
     sp_lj[tid]=sp_lj_in[tid];
   if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
     lj1[tid]=lj1_in[tid];
-    if (eflag>0)
-      lj3[tid]=lj3_in[tid];
+    lj3[tid]=lj3_in[tid];
   }
 
   acctyp energy=(acctyp)0;
@@ -212,17 +215,21 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
       numtyp rsq = delx*delx+dely*dely+delz*delz;
 
       if (rsq<lj1[mtype].z) {
-        numtyp r2inv=ucl_recip(rsq);
         numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
+        numtyp r2inv=ucl_recip(rsq);
+        numtyp r = ucl_sqrt(rsq);
 
         if (rsq < lj1[mtype].w) {
-          r6inv = r2inv*r2inv*r2inv;
-          force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
+          numtyp rshift = r - lj3[mtype].w;
+          numtyp rshiftsq = rshift*rshift;
+          numtyp rshift2inv = ucl_recip(rshiftsq);
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
+          force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
+          force_lj *= factor_lj/rshift/r;
         } else
           force_lj = (numtyp)0.0;
 
         if (rsq < cut_coulsq) {
-          numtyp r = ucl_rsqrt(r2inv);
           numtyp grij = g_ewald * r;
           numtyp expm2 = ucl_exp(-grij*grij);
           numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
@@ -233,7 +240,7 @@ __kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
         } else
           forcecoul = (numtyp)0.0;
 
-        force = (force_lj + forcecoul) * r2inv;
+        force = force_lj + forcecoul*r2inv;
 
         f.x+=delx*force;
         f.y+=dely*force;
diff --git a/lib/gpu/lal_lj_expand_coul_long.h b/lib/gpu/lal_lj_expand_coul_long.h
index c5c7a468a98d68f979bb098f49ad7aa5a70a85b4..64fd9df1abdcb88725e021db9a8bfb5dd96d94d3 100644
--- a/lib/gpu/lal_lj_expand_coul_long.h
+++ b/lib/gpu/lal_lj_expand_coul_long.h
@@ -10,7 +10,7 @@
  __________________________________________________________________________
 
     begin                :
-    email                : trung.nguyen@northwestern.edu
+    email                : ndactrung@gmail.com
  ***************************************************************************/
 
 #ifndef LAL_LJ_EXPAND_COUL_LONG_H
diff --git a/lib/gpu/lal_lj_expand_coul_long_ext.cpp b/lib/gpu/lal_lj_expand_coul_long_ext.cpp
index e4b53bbf20710681c2049fc1dea520f049a6e4b2..3ff1bef70128c20122c1d3c28e800a43e98c56fd 100644
--- a/lib/gpu/lal_lj_expand_coul_long_ext.cpp
+++ b/lib/gpu/lal_lj_expand_coul_long_ext.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                            lj_coul_long_ext.cpp
+                           lj_expand_coul_long_ext.cpp
                             ------------------------
                             Trung Nguyen (Northwestern)
 
@@ -10,7 +10,7 @@
  __________________________________________________________________________
 
     begin                :
-    email                : trung.nguyen@northwestern.edu
+    email                : ndactrung@gmail.com
  ***************************************************************************/
 
 #include <iostream>
diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu
index b0b3cfb90c958c5c7e7f0c84816be3b03ba73d04..83692a24e4c04bfd9d1441c2a49b5da01fc9813e 100644
--- a/lib/gpu/lal_neighbor_gpu.cu
+++ b/lib/gpu/lal_neighbor_gpu.cu
@@ -118,24 +118,24 @@ __kernel void transpose(__global tagint *restrict out,
                         const __global tagint *restrict in,
                         int columns_in, int rows_in)
 {
-        __local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
+  __local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
 
-        unsigned ti=THREAD_ID_X;
-        unsigned tj=THREAD_ID_Y;
-        unsigned bi=BLOCK_ID_X;
-        unsigned bj=BLOCK_ID_Y;
+  unsigned ti=THREAD_ID_X;
+  unsigned tj=THREAD_ID_Y;
+  unsigned bi=BLOCK_ID_X;
+  unsigned bj=BLOCK_ID_Y;
 
-        unsigned i=bi*BLOCK_CELL_2D+ti;
-        unsigned j=bj*BLOCK_CELL_2D+tj;
-        if ((i<columns_in) && (j<rows_in))
-                block[tj][ti]=in[j*columns_in+i];
+  unsigned i=bi*BLOCK_CELL_2D+ti;
+  unsigned j=bj*BLOCK_CELL_2D+tj;
+  if ((i<columns_in) && (j<rows_in))
+    block[tj][ti]=in[j*columns_in+i];
 
-        __syncthreads();
+   __syncthreads();
 
-        i=bj*BLOCK_CELL_2D+ti;
-        j=bi*BLOCK_CELL_2D+tj;
-        if ((i<rows_in) && (j<columns_in))
-                out[j*rows_in+i] = block[ti][tj];
+  i=bj*BLOCK_CELL_2D+ti;
+  j=bi*BLOCK_CELL_2D+tj;
+  if ((i<rows_in) && (j<columns_in))
+    out[j*rows_in+i] = block[ti][tj];
 }
 
 __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
@@ -191,7 +191,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
       nbor_list[pid_i]=pid_i;
     } else {
       stride=0;
-            neigh_counts=host_numj+pid_i-inum;
+      neigh_counts=host_numj+pid_i-inum;
       neigh_list=host_nbor_list+(pid_i-inum)*neigh_bin_size;
     }
 
@@ -232,7 +232,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
                 diff.z = atom_i.z - pos_sh[j].z;
 
                 r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
-                if (r2 < cell_size*cell_size && r2 > 1e-5) {
+                if (r2 < cell_size*cell_size && pid_j != pid_i) { //  && r2 > 1e-5
                   cnt++;
                   if (cnt <= neigh_bin_size) {
                     *neigh_list = pid_j;
@@ -243,8 +243,8 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
                 }
               }
             }
-                  __syncthreads();
-                } // for (k)
+            __syncthreads();
+          } // for (k)
         }
       }
     }
diff --git a/lib/latte/Install.py b/lib/latte/Install.py
index 4b94ac3cba4381e5cbabbff902ee837b5b053138..82936ecda4b248bfafc81c116d5eedcb7effec0b 100644
--- a/lib/latte/Install.py
+++ b/lib/latte/Install.py
@@ -12,12 +12,12 @@ help = """
 Syntax from src dir: make lib-latte args="-b"
                  or: make lib-latte args="-p /usr/local/latte"
                  or: make lib-latte args="-m gfortran"
-                 or: make lib-latte args="-b -v 1.1.1"
+                 or: make lib-latte args="-b -v 1.2.1"
 
 Syntax from lib dir: python Install.py -b
                  or: python Install.py -p /usr/local/latte
                  or: python Install.py -m gfortran
-                 or: python Install.py -v 1.1.1 -b
+                 or: python Install.py -v 1.2.1 -b
 
 specify one or more options, order does not matter
 
@@ -34,7 +34,7 @@ make lib-latte args="-p $HOME/latte"   # use existing LATTE installation
 
 # settings
 
-version = '1.1.1'
+version = '1.2.1'
 
 # print error message or help
 
diff --git a/lib/linalg/Makefile.gfortran b/lib/linalg/Makefile.gfortran
index 7e1d97a5bce8bc2982fba617541d9f48bee4e0dd..2a777099e98a2d92687fcaaa58154bdaf397bab7 100644
--- a/lib/linalg/Makefile.gfortran
+++ b/lib/linalg/Makefile.gfortran
@@ -7,13 +7,14 @@ SHELL = /bin/sh
 # ------ FILES ------
 
 SRC =	$(wildcard *.f)
+SRC1 =	$(wildcard *.F)
 
-FILES = $(SRC) Makefile.* README
+FILES = $(SRC) $(SRC1) Makefile.* README
 
 # ------ DEFINITIONS ------
 
 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o)
+OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
 
 # ------ SETTINGS ------
 
@@ -34,7 +35,7 @@ lib: 	$(OBJ)
 # ------ COMPILE RULES ------
 
 %.o:%.F
-	$(F90) $(F90FLAGS) -c $<
+	$(FC) $(FFLAGS) -c $<
 
 %.o:%.f
 	$(FC) $(FFLAGS) -c $<
diff --git a/lib/linalg/Makefile.mpi b/lib/linalg/Makefile.mpi
index dd22ff134caf02e6350304125af4a0420da1cf25..26bfab4c80857355a055c5322f27ea4fdac55b82 100644
--- a/lib/linalg/Makefile.mpi
+++ b/lib/linalg/Makefile.mpi
@@ -7,13 +7,14 @@ SHELL = /bin/sh
 # ------ FILES ------
 
 SRC =	$(wildcard *.f)
+SRC1 =	$(wildcard *.F)
 
-FILES = $(SRC) Makefile.* README
+FILES = $(SRC) $(SRC1) Makefile.* README
 
 # ------ DEFINITIONS ------
 
 LIB = liblinalg.a
-OBJ =   $(SRC:.f=.o)
+OBJ =   $(SRC:.f=.o) $(SRC1:.F=.o)
 
 # ------ SETTINGS ------
 
@@ -34,7 +35,7 @@ lib: 	$(OBJ)
 # ------ COMPILE RULES ------
 
 %.o:%.F
-	$(F90) $(F90FLAGS) -c $<
+	$(FC) $(FFLAGS) -c $<
 
 %.o:%.f
 	$(FC) $(FFLAGS) -c $<
diff --git a/lib/linalg/dasum.f b/lib/linalg/dasum.f
index c1bd78ac815d18d79f8218011d74f780ce8f7073..cc5977f77044edc96caf5a94295ee834d4fe4025 100644
--- a/lib/linalg/dasum.f
+++ b/lib/linalg/dasum.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -26,15 +26,35 @@
 *>    DASUM takes the sum of the absolute values.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -51,10 +71,10 @@
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/daxpy.f b/lib/linalg/daxpy.f
index 64a02d68bc8d1eefd1e8838606fc2abd9e5d0456..cb94fc1e0ab5e99df8d71893be0ecc0e88eefdeb 100644
--- a/lib/linalg/daxpy.f
+++ b/lib/linalg/daxpy.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION DA
 *       INTEGER INCX,INCY,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -28,15 +28,52 @@
 *>    uses unrolled loops for increments equal to one.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DA
+*> \verbatim
+*>          DA is DOUBLE PRECISION
+*>           On entry, DA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*>
+*> \param[in,out] DY
+*> \verbatim
+*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of DY
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -52,10 +89,10 @@
 *  =====================================================================
       SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION DA
diff --git a/lib/linalg/dbdsqr.f b/lib/linalg/dbdsqr.f
index 007e99779b8b836a407524c52f43993529103d2f..93db95e7a8604fbbd50d9878f47c74ad0972c39d 100644
--- a/lib/linalg/dbdsqr.f
+++ b/lib/linalg/dbdsqr.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DBDSQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f"> 
+*> Download DBDSQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
 *                          LDU, C, LDC, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
 *      $                   VT( LDVT, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -40,9 +40,9 @@
 *> left singular vectors from the singular value decomposition (SVD) of
 *> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
 *> zero-shift QR algorithm.  The SVD of B has the form
-*> 
+*>
 *>    B = Q * S * P**T
-*> 
+*>
 *> where S is the diagonal matrix of singular values, Q is an orthogonal
 *> matrix of left singular vectors, and P is an orthogonal matrix of
 *> right singular vectors.  If left singular vectors are requested, this
@@ -113,7 +113,7 @@
 *> \verbatim
 *>          E is DOUBLE PRECISION array, dimension (N-1)
 *>          On entry, the N-1 offdiagonal elements of the bidiagonal
-*>          matrix B. 
+*>          matrix B.
 *>          On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
 *>          will contain the diagonal and superdiagonal elements of a
 *>          bidiagonal matrix orthogonally equivalent to the one given
@@ -179,7 +179,7 @@
 *>                = 1, a split was marked by a positive value in E
 *>                = 2, current block of Z not diagonalized after 30*N
 *>                     iterations (in inner while loop)
-*>                = 3, termination criterion of outer while loop not met 
+*>                = 3, termination criterion of outer while loop not met
 *>                     (program created more than N unreduced blocks)
 *>             else NCVT = NRU = NCC = 0,
 *>                   the algorithm did not converge; D and E contain the
@@ -212,17 +212,28 @@
 *>          algorithm through its inner loop. The algorithms stops
 *>          (and so fails to converge) if the number of passes
 *>          through the inner loop exceeds MAXITR*N**2.
+*>
+*> \endverbatim
+*
+*> \par Note:
+*  ===========
+*>
+*> \verbatim
+*>  Bug report from Cezary Dendek.
+*>  On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is
+*>  removed since it can overflow pretty easily (for N larger or equal
+*>  than 18,919). We instead use MAXITDIVN = MAXITR*N.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date June 2017
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -230,10 +241,10 @@
       SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
      $                   LDU, C, LDC, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     June 2017
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
@@ -266,8 +277,8 @@
 *     ..
 *     .. Local Scalars ..
       LOGICAL            LOWER, ROTATE
-      INTEGER            I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
-     $                   NM12, NM13, OLDLL, OLDM
+      INTEGER            I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M,
+     $                   MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM
       DOUBLE PRECISION   ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
      $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
      $                   SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
@@ -329,7 +340,7 @@
          CALL DLASQ1( N, D, E, WORK, INFO )
 *
 *     If INFO equals 2, dqds didn't finish, try to finish
-*         
+*
          IF( INFO .NE. 2 ) RETURN
          INFO = 0
       END IF
@@ -400,20 +411,21 @@
    40    CONTINUE
    50    CONTINUE
          SMINOA = SMINOA / SQRT( DBLE( N ) )
-         THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
+         THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) )
       ELSE
 *
 *        Absolute accuracy desired
 *
-         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
+         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) )
       END IF
 *
 *     Prepare for main iteration loop for the singular values
 *     (MAXIT is the maximum number of passes through the inner
 *     loop permitted before nonconvergence signalled.)
 *
-      MAXIT = MAXITR*N*N
-      ITER = 0
+      MAXITDIVN = MAXITR*N
+      ITERDIVN = 0
+      ITER = -1
       OLDLL = -1
       OLDM = -1
 *
@@ -429,8 +441,13 @@
 *
       IF( M.LE.1 )
      $   GO TO 160
-      IF( ITER.GT.MAXIT )
-     $   GO TO 200
+*
+      IF( ITER.GE.N ) THEN
+         ITER = ITER - N
+         ITERDIVN = ITERDIVN + 1
+         IF( ITERDIVN.GE.MAXITDIVN )
+     $      GO TO 200
+      END IF
 *
 *     Find diagonal block of matrix to work on
 *
diff --git a/lib/linalg/dcabs1.f b/lib/linalg/dcabs1.f
index f6debb9ac261ffd2987feec1ef8bad9b2ec964bf..d6d850ed0fcb993a561e97a407f08cdb35fb6fa3 100644
--- a/lib/linalg/dcabs1.f
+++ b/lib/linalg/dcabs1.f
@@ -2,47 +2,55 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DCABS1(Z)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 Z
 *       ..
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
-*> DCABS1 computes absolute value of a double complex number 
+*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] Z
+*> \verbatim
+*>          Z is COMPLEX*16
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DCABS1(Z)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 Z
diff --git a/lib/linalg/dcopy.f b/lib/linalg/dcopy.f
index d9d5ac7aa2823e2f1919f80b1f360ef06fc96dc1..27bc08582b39d179563931dd05564eae9e455b69 100644
--- a/lib/linalg/dcopy.f
+++ b/lib/linalg/dcopy.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -24,18 +24,49 @@
 *> \verbatim
 *>
 *>    DCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to one.
+*>    uses unrolled loops for increments equal to 1.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*>
+*> \param[out] DY
+*> \verbatim
+*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of DY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -51,10 +82,10 @@
 *  =====================================================================
       SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
@@ -85,7 +116,7 @@
                DY(I) = DX(I)
             END DO
             IF (N.LT.7) RETURN
-         END IF   
+         END IF
          MP1 = M + 1
          DO I = MP1,N,7
             DY(I) = DX(I)
@@ -96,7 +127,7 @@
             DY(I+5) = DX(I+5)
             DY(I+6) = DX(I+6)
          END DO
-      ELSE      
+      ELSE
 *
 *        code for unequal increments or equal increments
 *          not equal to 1
diff --git a/lib/linalg/ddot.f b/lib/linalg/ddot.f
index cc0c1b7a43e0712322650c6856307d65cdfd13bb..3d18695aab0827181c83cb50ecf4aaca365f9ecd 100644
--- a/lib/linalg/ddot.f
+++ b/lib/linalg/ddot.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -27,15 +27,46 @@
 *>    uses unrolled loops for increments equal to one.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*>
+*> \param[in] DY
+*> \verbatim
+*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of DY
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -51,10 +82,10 @@
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
diff --git a/lib/linalg/dgebd2.f b/lib/linalg/dgebd2.f
index 4b4dcc964159ec8bd46a65b0944f9a135de3bd06..2bec4e29c718a9bf98657cecd669de82ccadffb5 100644
--- a/lib/linalg/dgebd2.f
+++ b/lib/linalg/dgebd2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGEBD2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f"> 
+*> Download DGEBD2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
@@ -27,7 +27,7 @@
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -100,7 +100,7 @@
 *>
 *> \param[out] TAUQ
 *> \verbatim
-*>          TAUQ is DOUBLE PRECISION array dimension (min(M,N))
+*>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
@@ -127,12 +127,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
 *> \ingroup doubleGEcomputational
 *
@@ -189,10 +189,10 @@
 *  =====================================================================
       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lib/linalg/dgebrd.f b/lib/linalg/dgebrd.f
index 6cb61f002f5b9c0d9125b19cf7f9f1d04a490aa6..957cf2e53975afdca2e4d7c397ae8ee9f378d9d6 100644
--- a/lib/linalg/dgebrd.f
+++ b/lib/linalg/dgebrd.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGEBRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f"> 
+*> Download DGEBRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
 *                          INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -101,7 +101,7 @@
 *>
 *> \param[out] TAUQ
 *> \verbatim
-*>          TAUQ is DOUBLE PRECISION array dimension (min(M,N))
+*>          TAUQ is DOUBLE PRECISION array, dimension (min(M,N))
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
@@ -142,12 +142,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date June 2017
 *
 *> \ingroup doubleGEcomputational
 *
@@ -205,10 +205,10 @@
       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
      $                   INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lib/linalg/dgecon.f b/lib/linalg/dgecon.f
index df9d8e1c4030944c2d286c93b068c113ed8bbe13..be20bbcd2ad2b0651f188fb0cd1f9e5007c45e49 100644
--- a/lib/linalg/dgecon.f
+++ b/lib/linalg/dgecon.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGECON + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f"> 
+*> Download DGECON + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
 *                          INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM
 *       INTEGER            INFO, LDA, N
@@ -30,7 +30,7 @@
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -111,12 +111,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
@@ -124,10 +124,10 @@
       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
      $                   INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          NORM
diff --git a/lib/linalg/dgelq2.f b/lib/linalg/dgelq2.f
index 0d64ba52105f00f7bb7fc48830450bc656ea61ad..04aa57fc19324a4f018d1c1f791ac44a35606c7f 100644
--- a/lib/linalg/dgelq2.f
+++ b/lib/linalg/dgelq2.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGELQ2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f"> 
+*> Download DGELQ2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -91,12 +91,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
@@ -121,10 +121,10 @@
 *  =====================================================================
       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lib/linalg/dgelqf.f b/lib/linalg/dgelqf.f
index d27b04ab1d0464cd371de931d249ca453987f73c..834c47168f1ce9f1e8d6bc9cb28ce19c13ccb04d 100644
--- a/lib/linalg/dgelqf.f
+++ b/lib/linalg/dgelqf.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGELQF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f"> 
+*> Download DGELQF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -105,12 +105,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
@@ -135,10 +135,10 @@
 *  =====================================================================
       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lib/linalg/dgelsd.f b/lib/linalg/dgelsd.f
new file mode 100644
index 0000000000000000000000000000000000000000..f2cfd633761a442a6c20b3d4c344706b00f19f1f
--- /dev/null
+++ b/lib/linalg/dgelsd.f
@@ -0,0 +1,629 @@
+*> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGELSD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+*                          WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGELSD computes the minimum-norm solution to a real linear least
+*> squares problem:
+*>     minimize 2-norm(| b - A*x |)
+*> using the singular value decomposition (SVD) of A. A is an M-by-N
+*> matrix which may be rank-deficient.
+*>
+*> Several right hand side vectors b and solution vectors x can be
+*> handled in a single call; they are stored as the columns of the
+*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*> matrix X.
+*>
+*> The problem is solved in three steps:
+*> (1) Reduce the coefficient matrix A to bidiagonal form with
+*>     Householder transformations, reducing the original problem
+*>     into a "bidiagonal least squares problem" (BLS)
+*> (2) Solve the BLS using a divide and conquer approach.
+*> (3) Apply back all the Householder transformations to solve
+*>     the original least squares problem.
+*>
+*> The effective rank of A is determined by treating as zero those
+*> singular values which are less than RCOND times the largest singular
+*> value.
+*>
+*> The divide and conquer algorithm makes very mild assumptions about
+*> floating point arithmetic. It will work on machines with a guard
+*> digit in add/subtract, or on those binary machines without guard
+*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*> without guard digits, but we know of none.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of right hand sides, i.e., the number of columns
+*>          of the matrices B and X. NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*>          On exit, A has been destroyed.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>          On entry, the M-by-NRHS right hand side matrix B.
+*>          On exit, B is overwritten by the N-by-NRHS solution
+*>          matrix X.  If m >= n and RANK = n, the residual
+*>          sum-of-squares for the solution in the i-th column is given
+*>          by the sum of squares of elements n+1:m in that column.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension (min(M,N))
+*>          The singular values of A in decreasing order.
+*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>          RCOND is used to determine the effective rank of A.
+*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*>          If RCOND < 0, machine precision is used instead.
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>          The effective rank of A, i.e., the number of singular values
+*>          which are greater than RCOND*S(1).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. LWORK must be at least 1.
+*>          The exact minimum amount of workspace needed depends on M,
+*>          N and NRHS. As long as LWORK is at least
+*>              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+*>          if M is greater than or equal to N or
+*>              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+*>          if M is less than N, the code will execute correctly.
+*>          SMLSIZ is returned by ILAENV and is equal to the maximum
+*>          size of the subproblems at the bottom of the computation
+*>          tree (usually about 25), and
+*>             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*>          For good performance, LWORK should generally be larger.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
+*>          LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
+*>          where MINMN = MIN( M,N ).
+*>          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  the algorithm for computing the SVD failed to converge;
+*>                if INFO = i, i off-diagonal elements of an intermediate
+*>                bidiagonal form did not converge to zero.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup doubleGEsolve
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
+     $                   MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
+     $                   DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      LIWORK = 1
+      MINMN = MAX( 1, MINMN )
+      NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
+     $       LOG( TWO ) ) + 1, 0 )
+*
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         LIWORK = 3*MINMN*NLVL + 11*MINMN
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
+            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
+            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
+         END IF
+         IF( N.GT.M ) THEN
+            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
+!     XXX: Ensure the Path 2a case below is triggered.  The workspace
+!     calculation should use queries for all routines eventually.
+               MAXWRK = MAX( MAXWRK,
+     $              4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
+            ELSE
+*
+*              Path 2 - remaining underdetermined cases.
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
+            END IF
+            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+         IWORK( 1 ) = LIWORK
+
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure certain entries of B are zero.
+*
+      IF( M.LT.N )
+     $   CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+            END IF
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+*
+*        Bidiagonalize R in A.
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize A.
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWORK
+      RETURN
+*
+*     End of DGELSD
+*
+      END
diff --git a/lib/linalg/dgelss.f b/lib/linalg/dgelss.f
new file mode 100644
index 0000000000000000000000000000000000000000..674a7ba78400b2f19e3f2370952bae8f8f59cea8
--- /dev/null
+++ b/lib/linalg/dgelss.f
@@ -0,0 +1,747 @@
+*> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGELSS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+*                          WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGELSS computes the minimum norm solution to a real linear least
+*> squares problem:
+*>
+*> Minimize 2-norm(| b - A*x |).
+*>
+*> using the singular value decomposition (SVD) of A. A is an M-by-N
+*> matrix which may be rank-deficient.
+*>
+*> Several right hand side vectors b and solution vectors x can be
+*> handled in a single call; they are stored as the columns of the
+*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*> X.
+*>
+*> The effective rank of A is determined by treating as zero those
+*> singular values which are less than RCOND times the largest singular
+*> value.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of right hand sides, i.e., the number of columns
+*>          of the matrices B and X. NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*>          On exit, the first min(m,n) rows of A are overwritten with
+*>          its right singular vectors, stored rowwise.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>          On entry, the M-by-NRHS right hand side matrix B.
+*>          On exit, B is overwritten by the N-by-NRHS solution
+*>          matrix X.  If m >= n and RANK = n, the residual
+*>          sum-of-squares for the solution in the i-th column is given
+*>          by the sum of squares of elements n+1:m in that column.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension (min(M,N))
+*>          The singular values of A in decreasing order.
+*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>          RCOND is used to determine the effective rank of A.
+*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*>          If RCOND < 0, machine precision is used instead.
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>          The effective rank of A, i.e., the number of singular values
+*>          which are greater than RCOND*S(1).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. LWORK >= 1, and also:
+*>          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
+*>          For good performance, LWORK should generally be larger.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  the algorithm for computing the SVD failed to converge;
+*>                if INFO = i, i off-diagonal elements of an intermediate
+*>                bidiagonal form did not converge to zero.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleGEsolve
+*
+*  =====================================================================
+      SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      INTEGER            LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
+     $                   LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
+     $                   LWORK_DGELQF
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
+     $                   DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
+     $                   DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 ) THEN
+         MINWRK = 1
+         MAXWRK = 1
+         IF( MINMN.GT.0 ) THEN
+            MM = M
+            MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
+            IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*              Path 1a - overdetermined, with many more rows than
+*                        columns
+*
+*              Compute space needed for DGEQRF
+               CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
+               LWORK_DGEQRF=DUM(1)
+*              Compute space needed for DORMQR
+               CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
+     $                   LDB, DUM(1), -1, INFO )
+               LWORK_DORMQR=DUM(1)
+               MM = N
+               MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
+               MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
+            END IF
+            IF( M.GE.N ) THEN
+*
+*              Path 1 - overdetermined or exactly determined
+*
+*              Compute workspace needed for DBDSQR
+*
+               BDSPAC = MAX( 1, 5*N )
+*              Compute space needed for DGEBRD
+               CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+               LWORK_DGEBRD=DUM(1)
+*              Compute space needed for DORMBR
+               CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
+     $                B, LDB, DUM(1), -1, INFO )
+               LWORK_DORMBR=DUM(1)
+*              Compute space needed for DORGBR
+               CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+               LWORK_DORGBR=DUM(1)
+*              Compute total workspace needed
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+               MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+            IF( N.GT.M ) THEN
+*
+*              Compute workspace needed for DBDSQR
+*
+               BDSPAC = MAX( 1, 5*M )
+               MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
+               IF( N.GE.MNTHR ) THEN
+*
+*                 Path 2a - underdetermined, with many more columns
+*                 than rows
+*
+*                 Compute space needed for DGELQF
+                  CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
+     $                -1, INFO )
+                  LWORK_DGELQF=DUM(1)
+*                 Compute space needed for DGEBRD
+                  CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+                  LWORK_DGEBRD=DUM(1)
+*                 Compute space needed for DORMBR
+                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
+     $                DUM(1), B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMBR=DUM(1)
+*                 Compute space needed for DORGBR
+                  CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+                  LWORK_DORGBR=DUM(1)
+*                 Compute space needed for DORMLQ
+                  CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
+     $                 B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMLQ=DUM(1)
+*                 Compute total workspace needed
+                  MAXWRK = M + LWORK_DGELQF
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
+                  MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
+                  IF( NRHS.GT.1 ) THEN
+                     MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
+                  ELSE
+                     MAXWRK = MAX( MAXWRK, M*M + 2*M )
+                  END IF
+                  MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
+               ELSE
+*
+*                 Path 2 - underdetermined
+*
+*                 Compute space needed for DGEBRD
+                  CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+                  LWORK_DGEBRD=DUM(1)
+*                 Compute space needed for DORMBR
+                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
+     $                DUM(1), B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMBR=DUM(1)
+*                 Compute space needed for DORGBR
+                  CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+                  LWORK_DORGBR=DUM(1)
+                  MAXWRK = 3*M + LWORK_DGEBRD
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, N*NRHS )
+               END IF
+            END IF
+            MAXWRK = MAX( MINWRK, MAXWRK )
+         END IF
+         WORK( 1 ) = MAXWRK
+*
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $      INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+         CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
+*
+         CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of L in WORK(IL) and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need M*M+M+BDSPAC)
+*
+         CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IE
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
+     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
+            CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
+               CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, ZERO, WORK( IWORK ), 1 )
+            CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+         CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGELSS
+*
+      END
diff --git a/lib/linalg/dgemm.f b/lib/linalg/dgemm.f
index 45d001b7ab935a7938324794fd91ed8bd7312432..3a60ca4e730b55d6361a963dc71f72bac9ce18b6 100644
--- a/lib/linalg/dgemm.f
+++ b/lib/linalg/dgemm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER K,LDA,LDB,LDC,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -97,7 +97,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
 *>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
@@ -116,7 +116,7 @@
 *>
 *> \param[in] B
 *> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*>          B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
 *>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
 *>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
 *>           part of the array  B  must contain the matrix  B,  otherwise
@@ -142,7 +142,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*>          C is DOUBLE PRECISION array, dimension ( LDC, N )
 *>           Before entry, the leading  m by n  part of the array  C must
 *>           contain the matrix  C,  except when  beta  is zero, in which
 *>           case C need not be set on entry.
@@ -161,12 +161,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -187,10 +187,10 @@
 *  =====================================================================
       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
@@ -311,12 +311,10 @@
    60                 CONTINUE
                   END IF
                   DO 80 L = 1,K
-                      IF (B(L,J).NE.ZERO) THEN
-                          TEMP = ALPHA*B(L,J)
-                          DO 70 I = 1,M
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                     CONTINUE
-                      END IF
+                      TEMP = ALPHA*B(L,J)
+                      DO 70 I = 1,M
+                          C(I,J) = C(I,J) + TEMP*A(I,L)
+   70                 CONTINUE
    80             CONTINUE
    90         CONTINUE
           ELSE
@@ -353,12 +351,10 @@
   140                 CONTINUE
                   END IF
                   DO 160 L = 1,K
-                      IF (B(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*B(J,L)
-                          DO 150 I = 1,M
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                     CONTINUE
-                      END IF
+                      TEMP = ALPHA*B(J,L)
+                      DO 150 I = 1,M
+                          C(I,J) = C(I,J) + TEMP*A(I,L)
+  150                 CONTINUE
   160             CONTINUE
   170         CONTINUE
           ELSE
diff --git a/lib/linalg/dgemv.f b/lib/linalg/dgemv.f
index 675257fac7e8f7438d5aa304b083ea6e765ceced..08e395b1cd2f13ef4291d0161e428475a59d6ace 100644
--- a/lib/linalg/dgemv.f
+++ b/lib/linalg/dgemv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER INCX,INCY,LDA,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -71,7 +71,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients.
 *> \endverbatim
@@ -86,7 +86,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
@@ -110,7 +110,7 @@
 *>
 *> \param[in,out] Y
 *> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
+*>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
@@ -129,12 +129,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level2
 *
@@ -156,10 +156,10 @@
 *  =====================================================================
       SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
@@ -278,24 +278,20 @@
           JX = KX
           IF (INCY.EQ.1) THEN
               DO 60 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      DO 50 I = 1,M
-                          Y(I) = Y(I) + TEMP*A(I,J)
-   50                 CONTINUE
-                  END IF
+                  TEMP = ALPHA*X(JX)
+                  DO 50 I = 1,M
+                      Y(I) = Y(I) + TEMP*A(I,J)
+   50             CONTINUE
                   JX = JX + INCX
    60         CONTINUE
           ELSE
               DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IY = KY
-                      DO 70 I = 1,M
-                          Y(IY) = Y(IY) + TEMP*A(I,J)
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
+                  TEMP = ALPHA*X(JX)
+                  IY = KY
+                  DO 70 I = 1,M
+                      Y(IY) = Y(IY) + TEMP*A(I,J)
+                      IY = IY + INCY
+   70             CONTINUE
                   JX = JX + INCX
    80         CONTINUE
           END IF
diff --git a/lib/linalg/dgeqr2.f b/lib/linalg/dgeqr2.f
index 8e63db886668c2dfd73fef5938c5b6a251a38117..c1e91e9bdeb3ce1807d8c1b5d25672b09b4c94b3 100644
--- a/lib/linalg/dgeqr2.f
+++ b/lib/linalg/dgeqr2.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGEQR2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f"> 
+*> Download DGEQR2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -91,12 +91,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
@@ -121,10 +121,10 @@
 *  =====================================================================
       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lib/linalg/dgeqrf.f b/lib/linalg/dgeqrf.f
index 299025758173b562e8e4a96fce309f024a5cf87e..83d7d8dd713d469708945721db395e11c916bc72 100644
--- a/lib/linalg/dgeqrf.f
+++ b/lib/linalg/dgeqrf.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGEQRF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f"> 
+*> Download DGEQRF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -106,12 +106,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
@@ -136,10 +136,10 @@
 *  =====================================================================
       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lib/linalg/dger.f b/lib/linalg/dger.f
index a042483703bb5a4c7d13a325e088621da59ae91a..bdc8ef4349d740e0d39a8c2aa2942165c37e9980 100644
--- a/lib/linalg/dger.f
+++ b/lib/linalg/dger.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER INCX,INCY,LDA,M,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -57,7 +57,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the m
 *>           element vector x.
@@ -72,7 +72,7 @@
 *>
 *> \param[in] Y
 *> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
+*>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y.
@@ -87,7 +87,7 @@
 *>
 *> \param[in,out] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients. On exit, A is
 *>           overwritten by the updated matrix.
@@ -104,12 +104,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level2
 *
@@ -130,10 +130,10 @@
 *  =====================================================================
       SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA
diff --git a/lib/linalg/dgesv.f b/lib/linalg/dgesv.f
index 8d47f839dce221867a940cdad64ec390f789c755..23999e167f4681c72ccd8f8227ef79dab41f52be 100644
--- a/lib/linalg/dgesv.f
+++ b/lib/linalg/dgesv.f
@@ -2,16 +2,16 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGESV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f"> 
+*> Download DGESV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
@@ -19,7 +19,7 @@
 *  ===========
 *
 *       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LDB, N, NRHS
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -110,22 +110,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEsolve
 *
 *  =====================================================================
       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.0) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LDB, N, NRHS
diff --git a/lib/linalg/dgesvd.f b/lib/linalg/dgesvd.f
index 898570b66932e7dcfe0b6fc5f7b58259fb54dd33..ddf0bd5c2d92a79c0db93fb59a43ba53455e9fbf 100644
--- a/lib/linalg/dgesvd.f
+++ b/lib/linalg/dgesvd.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGESVD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f"> 
+*> Download DGESVD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
 *                          WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBU, JOBVT
 *       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
 *      $                   VT( LDVT, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -173,9 +173,9 @@
 *>          LWORK is INTEGER
 *>          The dimension of the array WORK.
 *>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
-*>             - PATH 1  (M much larger than N, JOBU='N') 
+*>             - PATH 1  (M much larger than N, JOBU='N')
 *>             - PATH 1t (N much larger than M, JOBVT='N')
-*>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
+*>          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
 *>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
@@ -198,10 +198,10 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
 *> \date April 2012
 *
@@ -211,7 +211,7 @@
       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
      $                   VT, LDVT, WORK, LWORK, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.1) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     April 2012
@@ -314,24 +314,24 @@
             BDSPAC = 5*N
 *           Compute space needed for DGEQRF
             CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
-            LWORK_DGEQRF=DUM(1)
+            LWORK_DGEQRF = INT( DUM(1) )
 *           Compute space needed for DORGQR
             CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
-            LWORK_DORGQR_N=DUM(1)
+            LWORK_DORGQR_N = INT( DUM(1) )
             CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
-            LWORK_DORGQR_M=DUM(1)
+            LWORK_DORGQR_M = INT( DUM(1) )
 *           Compute space needed for DGEBRD
             CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
      $                   DUM(1), DUM(1), -1, IERR )
-            LWORK_DGEBRD=DUM(1)
+            LWORK_DGEBRD = INT( DUM(1) )
 *           Compute space needed for DORGBR P
             CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
      $                   DUM(1), -1, IERR )
-            LWORK_DORGBR_P=DUM(1)
+            LWORK_DORGBR_P = INT( DUM(1) )
 *           Compute space needed for DORGBR Q
             CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
      $                   DUM(1), -1, IERR )
-            LWORK_DORGBR_Q=DUM(1)
+            LWORK_DORGBR_Q = INT( DUM(1) )
 *
             IF( M.GE.MNTHR ) THEN
                IF( WNTUN ) THEN
@@ -339,9 +339,9 @@
 *                 Path 1 (M much larger than N, JOBU='N')
 *
                   MAXWRK = N + LWORK_DGEQRF
-                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DGEBRD )
+                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
                   IF( WNTVO .OR. WNTVAS )
-     $               MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
+     $               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
                   MAXWRK = MAX( MAXWRK, BDSPAC )
                   MINWRK = MAX( 4*N, BDSPAC )
                ELSE IF( WNTUO .AND. WNTVN ) THEN
@@ -349,97 +349,97 @@
 *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
-                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUO .AND. WNTVAS ) THEN
 *
 *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
 *                 'A')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
-                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUS .AND. WNTVN ) THEN
 *
 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUS .AND. WNTVO ) THEN
 *
 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = 2*N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUS .AND. WNTVAS ) THEN
 *
 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
 *                 'A')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUA .AND. WNTVN ) THEN
 *
 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUA .AND. WNTVO ) THEN
 *
 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = 2*N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                ELSE IF( WNTUA .AND. WNTVAS ) THEN
 *
 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
 *                 'A')
 *
                   WRKBL = N + LWORK_DGEQRF
-                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
-                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = N*N + WRKBL
-                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MINWRK = MAX( 3*N + M, BDSPAC )
                END IF
             ELSE
 *
@@ -447,25 +447,25 @@
 *
                CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
      $                   DUM(1), DUM(1), -1, IERR )
-               LWORK_DGEBRD=DUM(1)
+               LWORK_DGEBRD = INT( DUM(1) )
                MAXWRK = 3*N + LWORK_DGEBRD
                IF( WNTUS .OR. WNTUO ) THEN
                   CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
      $                   DUM(1), -1, IERR )
-                  LWORK_DORGBR_Q=DUM(1)
-                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
+                  LWORK_DORGBR_Q = INT( DUM(1) )
+                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
                END IF
                IF( WNTUA ) THEN
                   CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
      $                   DUM(1), -1, IERR )
-                  LWORK_DORGBR_Q=DUM(1)
-                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
+                  LWORK_DORGBR_Q = INT( DUM(1) )
+                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
                END IF
                IF( .NOT.WNTVN ) THEN
-                 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
+                 MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
                END IF
                MAXWRK = MAX( MAXWRK, BDSPAC )
-               MINWRK = MAX( 3*N+M, BDSPAC )
+               MINWRK = MAX( 3*N + M, BDSPAC )
             END IF
          ELSE IF( MINMN.GT.0 ) THEN
 *
@@ -475,33 +475,33 @@
             BDSPAC = 5*M
 *           Compute space needed for DGELQF
             CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
-            LWORK_DGELQF=DUM(1)
+            LWORK_DGELQF = INT( DUM(1) )
 *           Compute space needed for DORGLQ
             CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
-            LWORK_DORGLQ_N=DUM(1)
+            LWORK_DORGLQ_N = INT( DUM(1) )
             CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
-            LWORK_DORGLQ_M=DUM(1)
+            LWORK_DORGLQ_M = INT( DUM(1) )
 *           Compute space needed for DGEBRD
             CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
      $                   DUM(1), DUM(1), -1, IERR )
-            LWORK_DGEBRD=DUM(1)
+            LWORK_DGEBRD = INT( DUM(1) )
 *            Compute space needed for DORGBR P
             CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
      $                   DUM(1), -1, IERR )
-            LWORK_DORGBR_P=DUM(1)
+            LWORK_DORGBR_P = INT( DUM(1) )
 *           Compute space needed for DORGBR Q
             CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
      $                   DUM(1), -1, IERR )
-            LWORK_DORGBR_Q=DUM(1)
+            LWORK_DORGBR_Q = INT( DUM(1) )
             IF( N.GE.MNTHR ) THEN
                IF( WNTVN ) THEN
 *
 *                 Path 1t(N much larger than M, JOBVT='N')
 *
                   MAXWRK = M + LWORK_DGELQF
-                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_DGEBRD )
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DGEBRD )
                   IF( WNTUO .OR. WNTUAS )
-     $               MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
+     $               MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
                   MAXWRK = MAX( MAXWRK, BDSPAC )
                   MINWRK = MAX( 4*M, BDSPAC )
                ELSE IF( WNTVO .AND. WNTUN ) THEN
@@ -509,97 +509,97 @@
 *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
-                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVO .AND. WNTUAS ) THEN
 *
 *                 Path 3t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='O')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
-                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVS .AND. WNTUN ) THEN
 *
 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVS .AND. WNTUO ) THEN
 *
 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = 2*M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVS .AND. WNTUAS ) THEN
 *
 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='S')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVA .AND. WNTUN ) THEN
 *
 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVA .AND. WNTUO ) THEN
 *
 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = 2*M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                ELSE IF( WNTVA .AND. WNTUAS ) THEN
 *
 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='A')
 *
                   WRKBL = M + LWORK_DGELQF
-                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
-                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
+                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
+                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
                   WRKBL = MAX( WRKBL, BDSPAC )
                   MAXWRK = M*M + WRKBL
-                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MINWRK = MAX( 3*M + N, BDSPAC )
                END IF
             ELSE
 *
@@ -607,26 +607,26 @@
 *
                CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
      $                   DUM(1), DUM(1), -1, IERR )
-               LWORK_DGEBRD=DUM(1)
+               LWORK_DGEBRD = INT( DUM(1) )
                MAXWRK = 3*M + LWORK_DGEBRD
                IF( WNTVS .OR. WNTVO ) THEN
 *                Compute space needed for DORGBR P
                  CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
      $                   DUM(1), -1, IERR )
-                 LWORK_DORGBR_P=DUM(1)
-                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
+                 LWORK_DORGBR_P = INT( DUM(1) )
+                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
                END IF
                IF( WNTVA ) THEN
                  CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
      $                   DUM(1), -1, IERR )
-                 LWORK_DORGBR_P=DUM(1)
-                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
+                 LWORK_DORGBR_P = INT( DUM(1) )
+                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
                END IF
                IF( .NOT.WNTUN ) THEN
-                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
                END IF
                MAXWRK = MAX( MAXWRK, BDSPAC )
-               MINWRK = MAX( 3*M+N, BDSPAC )
+               MINWRK = MAX( 3*M + N, BDSPAC )
             END IF
          END IF
          MAXWRK = MAX( MAXWRK, MINWRK )
@@ -685,21 +685,24 @@
                IWORK = ITAU + N
 *
 *              Compute A=Q*R
-*              (Workspace: need 2*N, prefer N+N*NB)
+*              (Workspace: need 2*N, prefer N + N*NB)
 *
                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
      $                      LWORK-IWORK+1, IERR )
 *
 *              Zero out below R
 *
-               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IF( N .GT. 1 ) THEN
+                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                         LDA )
+               END IF
                IE = 1
                ITAUQ = IE + N
                ITAUP = ITAUQ + N
                IWORK = ITAUP + N
 *
 *              Bidiagonalize R in A
-*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*              (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
@@ -708,7 +711,7 @@
                IF( WNTVO .OR. WNTVAS ) THEN
 *
 *                 If right singular vectors desired, generate P'.
-*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                   CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -739,13 +742,13 @@
 *                 Sufficient workspace for a fast algorithm
 *
                   IR = 1
-                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
 *
 *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
 *
                      LDWRKU = LDA
                      LDWRKR = LDA
-                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
 *
 *                    WORK(IU) is LDA by N, WORK(IR) is N by N
 *
@@ -762,7 +765,7 @@
                   IWORK = ITAU + N
 *
 *                 Compute A=Q*R
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -774,7 +777,7 @@
      $                         LDWRKR )
 *
 *                 Generate Q in A
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -784,14 +787,14 @@
                   IWORK = ITAUP + N
 *
 *                 Bidiagonalize R in WORK(IR)
-*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                   CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Generate left vectors bidiagonalizing R
-*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
      $                         WORK( ITAUQ ), WORK( IWORK ),
@@ -800,7 +803,7 @@
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of R in WORK(IR)
-*                 (Workspace: need N*N+BDSPAC)
+*                 (Workspace: need N*N + BDSPAC)
 *
                   CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
      $                         WORK( IR ), LDWRKR, DUM, 1,
@@ -809,7 +812,7 @@
 *
 *                 Multiply Q in A by left singular vectors of R in
 *                 WORK(IR), storing result in WORK(IU) and copying to A
-*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
 *
                   DO 10 I = 1, M, LDWRKU
                      CHUNK = MIN( M-I+1, LDWRKU )
@@ -830,14 +833,14 @@
                   IWORK = ITAUP + N
 *
 *                 Bidiagonalize A
-*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*                 (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
 *
                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Generate left vectors bidiagonalizing A
-*                 (Workspace: need 4*N, prefer 3*N+N*NB)
+*                 (Workspace: need 4*N, prefer 3*N + N*NB)
 *
                   CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -863,13 +866,13 @@
 *                 Sufficient workspace for a fast algorithm
 *
                   IR = 1
-                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
 *
                      LDWRKU = LDA
                      LDWRKR = LDA
-                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is N by N
 *
@@ -886,7 +889,7 @@
                   IWORK = ITAU + N
 *
 *                 Compute A=Q*R
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -899,7 +902,7 @@
      $                            VT( 2, 1 ), LDVT )
 *
 *                 Generate Q in A
-*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -909,7 +912,7 @@
                   IWORK = ITAUP + N
 *
 *                 Bidiagonalize R in VT, copying result to WORK(IR)
-*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
@@ -917,14 +920,14 @@
                   CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
 *
 *                 Generate left vectors bidiagonalizing R in WORK(IR)
-*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
      $                         WORK( ITAUQ ), WORK( IWORK ),
      $                         LWORK-IWORK+1, IERR )
 *
 *                 Generate right vectors bidiagonalizing R in VT
-*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
+*                 (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
 *
                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -933,7 +936,7 @@
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of R in WORK(IR) and computing right
 *                 singular vectors of R in VT
-*                 (Workspace: need N*N+BDSPAC)
+*                 (Workspace: need N*N + BDSPAC)
 *
                   CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
      $                         WORK( IR ), LDWRKR, DUM, 1,
@@ -942,7 +945,7 @@
 *
 *                 Multiply Q in A by left singular vectors of R in
 *                 WORK(IR), storing result in WORK(IU) and copying to A
-*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
 *
                   DO 20 I = 1, M, LDWRKU
                      CHUNK = MIN( M-I+1, LDWRKU )
@@ -961,7 +964,7 @@
                   IWORK = ITAU + N
 *
 *                 Compute A=Q*R
-*                 (Workspace: need 2*N, prefer N+N*NB)
+*                 (Workspace: need 2*N, prefer N + N*NB)
 *
                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -974,7 +977,7 @@
      $                            VT( 2, 1 ), LDVT )
 *
 *                 Generate Q in A
-*                 (Workspace: need 2*N, prefer N+N*NB)
+*                 (Workspace: need 2*N, prefer N + N*NB)
 *
                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -984,21 +987,21 @@
                   IWORK = ITAUP + N
 *
 *                 Bidiagonalize R in VT
-*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                 (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Multiply Q in A by left vectors bidiagonalizing R
-*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                 (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                   CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
      $                         LWORK-IWORK+1, IERR )
 *
 *                 Generate right vectors bidiagonalizing R in VT
-*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1042,7 +1045,7 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1055,7 +1058,7 @@
      $                            WORK( IR+1 ), LDWRKR )
 *
 *                    Generate Q in A
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1065,7 +1068,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in WORK(IR)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -1073,7 +1076,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left vectors bidiagonalizing R in WORK(IR)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
      $                            WORK( ITAUQ ), WORK( IWORK ),
@@ -1082,7 +1085,7 @@
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IR)
-*                    (Workspace: need N*N+BDSPAC)
+*                    (Workspace: need N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
      $                            1, WORK( IR ), LDWRKR, DUM, 1,
@@ -1103,14 +1106,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1121,18 +1124,20 @@
 *
 *                    Zero out below R in A
 *
-                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
-     $                            LDA )
+                     IF( N .GT. 1 ) THEN
+                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                               A( 2, 1 ), LDA )
+                     END IF
 *
 *                    Bidiagonalize R in A
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Multiply Q in U by left vectors bidiagonalizing R
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
@@ -1167,7 +1172,7 @@
                         LDWRKU = LDA
                         IR = IU + LDWRKU*N
                         LDWRKR = LDA
-                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
 *
@@ -1186,7 +1191,7 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R
-*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1199,7 +1204,7 @@
      $                            WORK( IU+1 ), LDWRKU )
 *
 *                    Generate Q in A
-*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
 *
                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1210,7 +1215,7 @@
 *
 *                    Bidiagonalize R in WORK(IU), copying result to
 *                    WORK(IR)
-*                    (Workspace: need 2*N*N+4*N,
+*                    (Workspace: need 2*N*N + 4*N,
 *                                prefer 2*N*N+3*N+2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
@@ -1221,14 +1226,14 @@
      $                            WORK( IR ), LDWRKR )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
      $                            WORK( ITAUQ ), WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need 2*N*N+4*N-1,
+*                    (Workspace: need 2*N*N + 4*N-1,
 *                                prefer 2*N*N+3*N+(N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
@@ -1239,7 +1244,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in WORK(IR)
-*                    (Workspace: need 2*N*N+BDSPAC)
+*                    (Workspace: need 2*N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
      $                            WORK( IR ), LDWRKR, WORK( IU ),
@@ -1266,14 +1271,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1284,25 +1289,27 @@
 *
 *                    Zero out below R in A
 *
-                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
-     $                            LDA )
+                     IF( N .GT. 1 ) THEN
+                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                               A( 2, 1 ), LDA )
+                     END IF
 *
 *                    Bidiagonalize R in A
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Multiply Q in U by left vectors bidiagonalizing R
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right vectors bidiagonalizing R in A
-*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1346,7 +1353,7 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1359,7 +1366,7 @@
      $                            WORK( IU+1 ), LDWRKU )
 *
 *                    Generate Q in A
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1369,7 +1376,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in WORK(IU), copying result to VT
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -1379,14 +1386,14 @@
      $                            LDVT )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
      $                            WORK( ITAUQ ), WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in VT
-*                    (Workspace: need N*N+4*N-1,
+*                    (Workspace: need N*N + 4*N-1,
 *                                prefer N*N+3*N+(N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
@@ -1396,7 +1403,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in VT
-*                    (Workspace: need N*N+BDSPAC)
+*                    (Workspace: need N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
@@ -1417,14 +1424,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1441,7 +1448,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in VT
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -1449,14 +1456,14 @@
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in VT
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in VT
-*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1503,7 +1510,7 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1517,7 +1524,7 @@
      $                            WORK( IR+1 ), LDWRKR )
 *
 *                    Generate Q in U
-*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1527,7 +1534,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in WORK(IR)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -1535,7 +1542,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
      $                            WORK( ITAUQ ), WORK( IWORK ),
@@ -1544,7 +1551,7 @@
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IR)
-*                    (Workspace: need N*N+BDSPAC)
+*                    (Workspace: need N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
      $                            1, WORK( IR ), LDWRKR, DUM, 1,
@@ -1569,14 +1576,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need N+M, prefer N+M*NB)
+*                    (Workspace: need N + M, prefer N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1587,11 +1594,13 @@
 *
 *                    Zero out below R in A
 *
-                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
-     $                            LDA )
+                     IF( N .GT. 1 ) THEN
+                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                                A( 2, 1 ), LDA )
+                     END IF
 *
 *                    Bidiagonalize R in A
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -1599,7 +1608,7 @@
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in A
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
@@ -1634,7 +1643,7 @@
                         LDWRKU = LDA
                         IR = IU + LDWRKU*N
                         LDWRKR = LDA
-                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
 *
@@ -1653,14 +1662,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
+*                    (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1678,7 +1687,7 @@
 *
 *                    Bidiagonalize R in WORK(IU), copying result to
 *                    WORK(IR)
-*                    (Workspace: need 2*N*N+4*N,
+*                    (Workspace: need 2*N*N + 4*N,
 *                                prefer 2*N*N+3*N+2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
@@ -1689,14 +1698,14 @@
      $                            WORK( IR ), LDWRKR )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
      $                            WORK( ITAUQ ), WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need 2*N*N+4*N-1,
+*                    (Workspace: need 2*N*N + 4*N-1,
 *                                prefer 2*N*N+3*N+(N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
@@ -1707,7 +1716,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in WORK(IR)
-*                    (Workspace: need 2*N*N+BDSPAC)
+*                    (Workspace: need 2*N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
      $                            WORK( IR ), LDWRKR, WORK( IU ),
@@ -1737,14 +1746,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need N+M, prefer N+M*NB)
+*                    (Workspace: need N + M, prefer N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1755,11 +1764,13 @@
 *
 *                    Zero out below R in A
 *
-                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
-     $                            LDA )
+                     IF( N .GT. 1 ) THEN
+                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                                A( 2, 1 ), LDA )
+                     END IF
 *
 *                    Bidiagonalize R in A
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -1767,14 +1778,14 @@
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in A
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in A
-*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1818,14 +1829,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1842,7 +1853,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in WORK(IU), copying result to VT
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -1852,14 +1863,14 @@
      $                            LDVT )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
 *
                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
      $                            WORK( ITAUQ ), WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in VT
-*                    (Workspace: need N*N+4*N-1,
+*                    (Workspace: need N*N + 4*N-1,
 *                                prefer N*N+3*N+(N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
@@ -1869,7 +1880,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in VT
-*                    (Workspace: need N*N+BDSPAC)
+*                    (Workspace: need N*N + BDSPAC)
 *
                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
@@ -1894,14 +1905,14 @@
                      IWORK = ITAU + N
 *
 *                    Compute A=Q*R, copying result to U
-*                    (Workspace: need 2*N, prefer N+N*NB)
+*                    (Workspace: need 2*N, prefer N + N*NB)
 *
                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
 *
 *                    Generate Q in U
-*                    (Workspace: need N+M, prefer N+M*NB)
+*                    (Workspace: need N + M, prefer N + M*NB)
 *
                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1918,7 +1929,7 @@
                      IWORK = ITAUP + N
 *
 *                    Bidiagonalize R in VT
-*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
 *
                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -1926,14 +1937,14 @@
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in VT
-*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
 *
                      CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in VT
-*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -1967,7 +1978,7 @@
             IWORK = ITAUP + N
 *
 *           Bidiagonalize A
-*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*           (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
 *
             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
@@ -1976,7 +1987,7 @@
 *
 *              If left singular vectors desired in U, copy result to U
 *              and generate left bidiagonalizing vectors in U
-*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
+*              (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
 *
                CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
                IF( WNTUS )
@@ -1990,7 +2001,7 @@
 *
 *              If right singular vectors desired in VT, copy result to
 *              VT and generate right bidiagonalizing vectors in VT
-*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
                CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
@@ -2000,7 +2011,7 @@
 *
 *              If left singular vectors desired in A, generate left
 *              bidiagonalizing vectors in A
-*              (Workspace: need 4*N, prefer 3*N+N*NB)
+*              (Workspace: need 4*N, prefer 3*N + N*NB)
 *
                CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2009,7 +2020,7 @@
 *
 *              If right singular vectors desired in A, generate right
 *              bidiagonalizing vectors in A
-*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
 *
                CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2071,7 +2082,7 @@
                IWORK = ITAU + M
 *
 *              Compute A=L*Q
-*              (Workspace: need 2*M, prefer M+M*NB)
+*              (Workspace: need 2*M, prefer M + M*NB)
 *
                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
      $                      LWORK-IWORK+1, IERR )
@@ -2085,7 +2096,7 @@
                IWORK = ITAUP + M
 *
 *              Bidiagonalize L in A
-*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*              (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
@@ -2093,7 +2104,7 @@
                IF( WNTUO .OR. WNTUAS ) THEN
 *
 *                 If left singular vectors desired, generate Q
-*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*                 (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                   CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2126,14 +2137,14 @@
 *                 Sufficient workspace for a fast algorithm
 *
                   IR = 1
-                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
 *
                      LDWRKU = LDA
                      CHUNK = N
                      LDWRKR = LDA
-                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
 *
@@ -2152,7 +2163,7 @@
                   IWORK = ITAU + M
 *
 *                 Compute A=L*Q
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2164,7 +2175,7 @@
      $                         WORK( IR+LDWRKR ), LDWRKR )
 *
 *                 Generate Q in A
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2174,14 +2185,14 @@
                   IWORK = ITAUP + M
 *
 *                 Bidiagonalize L in WORK(IR)
-*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                   CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Generate right vectors bidiagonalizing L
-*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
 *
                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
      $                         WORK( ITAUP ), WORK( IWORK ),
@@ -2190,7 +2201,7 @@
 *
 *                 Perform bidiagonal QR iteration, computing right
 *                 singular vectors of L in WORK(IR)
-*                 (Workspace: need M*M+BDSPAC)
+*                 (Workspace: need M*M + BDSPAC)
 *
                   CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
@@ -2199,7 +2210,7 @@
 *
 *                 Multiply right singular vectors of L in WORK(IR) by Q
 *                 in A, storing result in WORK(IU) and copying to A
-*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
+*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
 *
                   DO 30 I = 1, N, CHUNK
                      BLK = MIN( N-I+1, CHUNK )
@@ -2220,14 +2231,14 @@
                   IWORK = ITAUP + M
 *
 *                 Bidiagonalize A
-*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*                 (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
 *
                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Generate right vectors bidiagonalizing A
-*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*                 (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                   CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2253,14 +2264,14 @@
 *                 Sufficient workspace for a fast algorithm
 *
                   IR = 1
-                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
 *
                      LDWRKU = LDA
                      CHUNK = N
                      LDWRKR = LDA
-                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
 *
@@ -2279,7 +2290,7 @@
                   IWORK = ITAU + M
 *
 *                 Compute A=L*Q
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2291,7 +2302,7 @@
      $                         LDU )
 *
 *                 Generate Q in A
-*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2301,7 +2312,7 @@
                   IWORK = ITAUP + M
 *
 *                 Bidiagonalize L in U, copying result to WORK(IR)
-*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
@@ -2309,14 +2320,14 @@
                   CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
 *
 *                 Generate right vectors bidiagonalizing L in WORK(IR)
-*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
 *
                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
      $                         WORK( ITAUP ), WORK( IWORK ),
      $                         LWORK-IWORK+1, IERR )
 *
 *                 Generate left vectors bidiagonalizing L in U
-*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
 *
                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2325,7 +2336,7 @@
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of L in U, and computing right
 *                 singular vectors of L in WORK(IR)
-*                 (Workspace: need M*M+BDSPAC)
+*                 (Workspace: need M*M + BDSPAC)
 *
                   CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
@@ -2334,7 +2345,7 @@
 *
 *                 Multiply right singular vectors of L in WORK(IR) by Q
 *                 in A, storing result in WORK(IU) and copying to A
-*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
+*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
 *
                   DO 40 I = 1, N, CHUNK
                      BLK = MIN( N-I+1, CHUNK )
@@ -2353,7 +2364,7 @@
                   IWORK = ITAU + M
 *
 *                 Compute A=L*Q
-*                 (Workspace: need 2*M, prefer M+M*NB)
+*                 (Workspace: need 2*M, prefer M + M*NB)
 *
                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2365,7 +2376,7 @@
      $                         LDU )
 *
 *                 Generate Q in A
-*                 (Workspace: need 2*M, prefer M+M*NB)
+*                 (Workspace: need 2*M, prefer M + M*NB)
 *
                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2375,21 +2386,21 @@
                   IWORK = ITAUP + M
 *
 *                 Bidiagonalize L in U
-*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                 (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
      $                         WORK( ITAUQ ), WORK( ITAUP ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                 Multiply right vectors bidiagonalizing L by Q in A
-*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                 (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                   CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
      $                         LWORK-IWORK+1, IERR )
 *
 *                 Generate left vectors bidiagonalizing L in U
-*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*                 (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2433,7 +2444,7 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2446,7 +2457,7 @@
      $                            WORK( IR+LDWRKR ), LDWRKR )
 *
 *                    Generate Q in A
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2456,7 +2467,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in WORK(IR)
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -2465,7 +2476,7 @@
 *
 *                    Generate right vectors bidiagonalizing L in
 *                    WORK(IR)
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
      $                            WORK( ITAUP ), WORK( IWORK ),
@@ -2474,7 +2485,7 @@
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of L in WORK(IR)
-*                    (Workspace: need M*M+BDSPAC)
+*                    (Workspace: need M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
@@ -2495,7 +2506,7 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2505,7 +2516,7 @@
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2520,14 +2531,14 @@
      $                            LDA )
 *
 *                    Bidiagonalize L in A
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Multiply right vectors bidiagonalizing L by Q in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
      $                            WORK( ITAUP ), VT, LDVT,
@@ -2562,7 +2573,7 @@
                         LDWRKU = LDA
                         IR = IU + LDWRKU*M
                         LDWRKR = LDA
-                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
 *
@@ -2581,7 +2592,7 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q
-*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2594,7 +2605,7 @@
      $                            WORK( IU+LDWRKU ), LDWRKU )
 *
 *                    Generate Q in A
-*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
 *
                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2605,7 +2616,7 @@
 *
 *                    Bidiagonalize L in WORK(IU), copying result to
 *                    WORK(IR)
-*                    (Workspace: need 2*M*M+4*M,
+*                    (Workspace: need 2*M*M + 4*M,
 *                                prefer 2*M*M+3*M+2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
@@ -2616,7 +2627,7 @@
      $                            WORK( IR ), LDWRKR )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need 2*M*M+4*M-1,
+*                    (Workspace: need 2*M*M + 4*M-1,
 *                                prefer 2*M*M+3*M+(M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
@@ -2624,7 +2635,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
      $                            WORK( ITAUQ ), WORK( IWORK ),
@@ -2634,7 +2645,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in WORK(IR) and computing
 *                    right singular vectors of L in WORK(IU)
-*                    (Workspace: need 2*M*M+BDSPAC)
+*                    (Workspace: need 2*M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
      $                            WORK( IU ), LDWRKU, WORK( IR ),
@@ -2661,14 +2672,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2683,21 +2694,21 @@
      $                            LDA )
 *
 *                    Bidiagonalize L in A
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Multiply right vectors bidiagonalizing L by Q in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
      $                            WORK( ITAUP ), VT, LDVT,
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors of L in A
-*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2741,7 +2752,7 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2754,7 +2765,7 @@
      $                            WORK( IU+LDWRKU ), LDWRKU )
 *
 *                    Generate Q in A
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2764,7 +2775,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in WORK(IU), copying result to U
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -2774,7 +2785,7 @@
      $                            LDU )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need M*M+4*M-1,
+*                    (Workspace: need M*M + 4*M-1,
 *                                prefer M*M+3*M+(M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
@@ -2782,7 +2793,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in U
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2791,7 +2802,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in U and computing right
 *                    singular vectors of L in WORK(IU)
-*                    (Workspace: need M*M+BDSPAC)
+*                    (Workspace: need M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
@@ -2812,14 +2823,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2835,7 +2846,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in U
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -2843,14 +2854,14 @@
 *
 *                    Multiply right bidiagonalizing vectors in U by Q
 *                    in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
      $                            WORK( ITAUP ), VT, LDVT,
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in U
-*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2877,7 +2888,7 @@
 *                 N right singular vectors to be computed in VT and
 *                 no left singular vectors to be computed
 *
-                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
@@ -2897,7 +2908,7 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2911,7 +2922,7 @@
      $                            WORK( IR+LDWRKR ), LDWRKR )
 *
 *                    Generate Q in VT
-*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2921,7 +2932,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in WORK(IR)
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -2929,7 +2940,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need M*M+4*M-1,
+*                    (Workspace: need M*M + 4*M-1,
 *                                prefer M*M+3*M+(M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
@@ -2939,7 +2950,7 @@
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of L in WORK(IR)
-*                    (Workspace: need M*M+BDSPAC)
+*                    (Workspace: need M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
@@ -2964,14 +2975,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need M+N, prefer M+N*NB)
+*                    (Workspace: need M + N, prefer M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -2986,7 +2997,7 @@
      $                            LDA )
 *
 *                    Bidiagonalize L in A
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -2994,7 +3005,7 @@
 *
 *                    Multiply right bidiagonalizing vectors in A by Q
 *                    in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
      $                            WORK( ITAUP ), VT, LDVT,
@@ -3017,7 +3028,7 @@
 *                 N right singular vectors to be computed in VT and
 *                 M left singular vectors to be overwritten on A
 *
-                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+                  IF( LWORK.GE.2*M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
@@ -3029,7 +3040,7 @@
                         LDWRKU = LDA
                         IR = IU + LDWRKU*M
                         LDWRKR = LDA
-                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
 *
@@ -3048,14 +3059,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
+*                    (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3073,7 +3084,7 @@
 *
 *                    Bidiagonalize L in WORK(IU), copying result to
 *                    WORK(IR)
-*                    (Workspace: need 2*M*M+4*M,
+*                    (Workspace: need 2*M*M + 4*M,
 *                                prefer 2*M*M+3*M+2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
@@ -3084,7 +3095,7 @@
      $                            WORK( IR ), LDWRKR )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need 2*M*M+4*M-1,
+*                    (Workspace: need 2*M*M + 4*M-1,
 *                                prefer 2*M*M+3*M+(M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
@@ -3092,7 +3103,7 @@
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
-*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
      $                            WORK( ITAUQ ), WORK( IWORK ),
@@ -3102,7 +3113,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in WORK(IR) and computing
 *                    right singular vectors of L in WORK(IU)
-*                    (Workspace: need 2*M*M+BDSPAC)
+*                    (Workspace: need 2*M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
      $                            WORK( IU ), LDWRKU, WORK( IR ),
@@ -3132,14 +3143,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need M+N, prefer M+N*NB)
+*                    (Workspace: need M + N, prefer M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3154,7 +3165,7 @@
      $                            LDA )
 *
 *                    Bidiagonalize L in A
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -3162,14 +3173,14 @@
 *
 *                    Multiply right bidiagonalizing vectors in A by Q
 *                    in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
      $                            WORK( ITAUP ), VT, LDVT,
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in A
-*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3193,7 +3204,7 @@
 *                 N right singular vectors to be computed in VT and
 *                 M left singular vectors to be computed in U
 *
-                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
@@ -3213,14 +3224,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3237,7 +3248,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in WORK(IU), copying result to U
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
      $                            WORK( IE ), WORK( ITAUQ ),
@@ -3247,14 +3258,14 @@
      $                            LDU )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
 *
                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
      $                            WORK( ITAUP ), WORK( IWORK ),
      $                            LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in U
-*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3263,7 +3274,7 @@
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in U and computing right
 *                    singular vectors of L in WORK(IU)
-*                    (Workspace: need M*M+BDSPAC)
+*                    (Workspace: need M*M + BDSPAC)
 *
                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
@@ -3288,14 +3299,14 @@
                      IWORK = ITAU + M
 *
 *                    Compute A=L*Q, copying result to VT
-*                    (Workspace: need 2*M, prefer M+M*NB)
+*                    (Workspace: need 2*M, prefer M + M*NB)
 *
                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
 *
 *                    Generate Q in VT
-*                    (Workspace: need M+N, prefer M+N*NB)
+*                    (Workspace: need M + N, prefer M + N*NB)
 *
                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3311,7 +3322,7 @@
                      IWORK = ITAUP + M
 *
 *                    Bidiagonalize L in U
-*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
 *
                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
      $                            WORK( ITAUQ ), WORK( ITAUP ),
@@ -3319,14 +3330,14 @@
 *
 *                    Multiply right bidiagonalizing vectors in U by Q
 *                    in VT
-*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
 *
                      CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
      $                            WORK( ITAUP ), VT, LDVT,
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 *
 *                    Generate left bidiagonalizing vectors in U
-*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*                    (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3360,7 +3371,7 @@
             IWORK = ITAUP + M
 *
 *           Bidiagonalize A
-*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*           (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
 *
             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
@@ -3369,7 +3380,7 @@
 *
 *              If left singular vectors desired in U, copy result to U
 *              and generate left bidiagonalizing vectors in U
-*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
 *
                CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
                CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
@@ -3379,7 +3390,7 @@
 *
 *              If right singular vectors desired in VT, copy result to
 *              VT and generate right bidiagonalizing vectors in VT
-*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
+*              (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
 *
                CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
                IF( WNTVA )
@@ -3393,7 +3404,7 @@
 *
 *              If left singular vectors desired in A, generate left
 *              bidiagonalizing vectors in A
-*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
 *
                CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
@@ -3402,7 +3413,7 @@
 *
 *              If right singular vectors desired in A, generate right
 *              bidiagonalizing vectors in A
-*              (Workspace: need 4*M, prefer 3*M+M*NB)
+*              (Workspace: need 4*M, prefer 3*M + M*NB)
 *
                CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
diff --git a/lib/linalg/dgetf2.f b/lib/linalg/dgetf2.f
index 649d0671deb88d3ee5f84f2e9b87e1e7e82c3e17..5458a5f3eb0e5099519539205cf0218fdf5b6862 100644
--- a/lib/linalg/dgetf2.f
+++ b/lib/linalg/dgetf2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGETF2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f"> 
+*> Download DGETF2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -96,22 +96,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
@@ -128,11 +128,11 @@
       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
-      DOUBLE PRECISION   SFMIN 
+      DOUBLE PRECISION   SFMIN
       INTEGER            I, J, JP
 *     ..
 *     .. External Functions ..
-      DOUBLE PRECISION   DLAMCH      
+      DOUBLE PRECISION   DLAMCH
       INTEGER            IDAMAX
       EXTERNAL           DLAMCH, IDAMAX
 *     ..
@@ -164,9 +164,9 @@
       IF( M.EQ.0 .OR. N.EQ.0 )
      $   RETURN
 *
-*     Compute machine safe minimum 
-* 
-      SFMIN = DLAMCH('S')  
+*     Compute machine safe minimum
+*
+      SFMIN = DLAMCH('S')
 *
       DO 10 J = 1, MIN( M, N )
 *
@@ -183,15 +183,15 @@
 *
 *           Compute elements J+1:M of J-th column.
 *
-            IF( J.LT.M ) THEN 
-               IF( ABS(A( J, J )) .GE. SFMIN ) THEN 
-                  CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) 
-               ELSE 
-                 DO 20 I = 1, M-J 
-                    A( J+I, J ) = A( J+I, J ) / A( J, J ) 
-   20            CONTINUE 
-               END IF 
-            END IF 
+            IF( J.LT.M ) THEN
+               IF( ABS(A( J, J )) .GE. SFMIN ) THEN
+                  CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+               ELSE
+                 DO 20 I = 1, M-J
+                    A( J+I, J ) = A( J+I, J ) / A( J, J )
+   20            CONTINUE
+               END IF
+            END IF
 *
          ELSE IF( INFO.EQ.0 ) THEN
 *
diff --git a/lib/linalg/dgetrf.f b/lib/linalg/dgetrf.f
index 45bb97f30c511f86e6fdd1b76510fe71192529a8..9a340b60f32733552cfa8e18421a53aa9a99cefc 100644
--- a/lib/linalg/dgetrf.f
+++ b/lib/linalg/dgetrf.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGETRF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f"> 
+*> Download DGETRF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -96,22 +96,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
@@ -131,7 +131,7 @@
       INTEGER            I, IINFO, J, JB, NB
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+      EXTERNAL           DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA
 *     ..
 *     .. External Functions ..
       INTEGER            ILAENV
@@ -169,7 +169,7 @@
 *
 *        Use unblocked code.
 *
-         CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+         CALL DGETRF2( M, N, A, LDA, IPIV, INFO )
       ELSE
 *
 *        Use blocked code.
@@ -180,7 +180,7 @@
 *           Factor diagonal and subdiagonal blocks and test for exact
 *           singularity.
 *
-            CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+            CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
 *
 *           Adjust INFO and the pivot indices.
 *
diff --git a/lib/linalg/dgetrf2.f b/lib/linalg/dgetrf2.f
new file mode 100644
index 0000000000000000000000000000000000000000..77948d23056828acbfb921ea2383987daefd9ed5
--- /dev/null
+++ b/lib/linalg/dgetrf2.f
@@ -0,0 +1,272 @@
+*> \brief \b DGETRF2
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, M, N
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IPIV( * )
+*       DOUBLE PRECISION   A( LDA, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGETRF2 computes an LU factorization of a general M-by-N matrix A
+*> using partial pivoting with row interchanges.
+*>
+*> The factorization has the form
+*>    A = P * L * U
+*> where P is a permutation matrix, L is lower triangular with unit
+*> diagonal elements (lower trapezoidal if m > n), and U is upper
+*> triangular (upper trapezoidal if m < n).
+*>
+*> This is the recursive version of the algorithm. It divides
+*> the matrix into four submatrices:
+*>
+*>        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2
+*>    A = [ -----|----- ]  with n1 = min(m,n)/2
+*>        [  A21 | A22  ]       n2 = n-n1
+*>
+*>                                       [ A11 ]
+*> The subroutine calls itself to factor [ --- ],
+*>                                       [ A12 ]
+*>                 [ A12 ]
+*> do the swaps on [ --- ], solve A12, update A22,
+*>                 [ A22 ]
+*>
+*> then calls itself to factor A22 and do the swaps on A21.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix to be factored.
+*>          On exit, the factors L and U from the factorization
+*>          A = P*L*U; the unit diagonal elements of L are not stored.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] IPIV
+*> \verbatim
+*>          IPIV is INTEGER array, dimension (min(M,N))
+*>          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*>          matrix was interchanged with row IPIV(i).
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*>                has been completed, but the factor U is exactly
+*>                singular, and division by zero will occur if it is used
+*>                to solve a system of equations.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup doubleGEcomputational
+*
+*  =====================================================================
+      RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   SFMIN, TEMP
+      INTEGER            I, IINFO, N1, N2
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      INTEGER            IDAMAX
+      EXTERNAL           DLAMCH, IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DSCAL, DLASWP, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+
+      IF ( M.EQ.1 ) THEN
+*
+*        Use unblocked code for one row case
+*        Just need to handle IPIV and INFO
+*
+         IPIV( 1 ) = 1
+         IF ( A(1,1).EQ.ZERO )
+     $      INFO = 1
+*
+      ELSE IF( N.EQ.1 ) THEN
+*
+*        Use unblocked code for one column case
+*
+*
+*        Compute machine safe minimum
+*
+         SFMIN = DLAMCH('S')
+*
+*        Find pivot and test for singularity
+*
+         I = IDAMAX( M, A( 1, 1 ), 1 )
+         IPIV( 1 ) = I
+         IF( A( I, 1 ).NE.ZERO ) THEN
+*
+*           Apply the interchange
+*
+            IF( I.NE.1 ) THEN
+               TEMP = A( 1, 1 )
+               A( 1, 1 ) = A( I, 1 )
+               A( I, 1 ) = TEMP
+            END IF
+*
+*           Compute elements 2:M of the column
+*
+            IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN
+               CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
+            ELSE
+               DO 10 I = 1, M-1
+                  A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 )
+   10          CONTINUE
+            END IF
+*
+         ELSE
+            INFO = 1
+         END IF
+*
+      ELSE
+*
+*        Use recursive code
+*
+         N1 = MIN( M, N ) / 2
+         N2 = N-N1
+*
+*               [ A11 ]
+*        Factor [ --- ]
+*               [ A21 ]
+*
+         CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO )
+
+         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*                              [ A12 ]
+*        Apply interchanges to [ --- ]
+*                              [ A22 ]
+*
+         CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 )
+*
+*        Solve A12
+*
+         CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
+     $               A( 1, N1+1 ), LDA )
+*
+*        Update A22
+*
+         CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
+     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
+*
+*        Factor A22
+*
+         CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ),
+     $                 IINFO )
+*
+*        Adjust INFO and the pivot indices
+*
+         IF ( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + N1
+         DO 20 I = N1+1, MIN( M, N )
+            IPIV( I ) = IPIV( I ) + N1
+   20    CONTINUE
+*
+*        Apply interchanges to A21
+*
+         CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 )
+*
+      END IF
+      RETURN
+*
+*     End of DGETRF2
+*
+      END
diff --git a/lib/linalg/dgetri.f b/lib/linalg/dgetri.f
index ad5324c07ef779da7104da25ee4aac395ca1da72..9d8cf2ad3e58c0976b78facb2595a06bb6040938 100644
--- a/lib/linalg/dgetri.f
+++ b/lib/linalg/dgetri.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGETRI + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f"> 
+*> Download DGETRI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, LWORK, N
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, N
diff --git a/lib/linalg/dgetrs.f b/lib/linalg/dgetrs.f
index 02e9832af79bbb45e570db2fe226a5324ea64d39..7ac727776e565b0aabc8a91cfda2ea0af64c8303 100644
--- a/lib/linalg/dgetrs.f
+++ b/lib/linalg/dgetrs.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DGETRS + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f"> 
+*> Download DGETRS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          TRANS
 *       INTEGER            INFO, LDA, LDB, N, NRHS
@@ -28,7 +28,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -109,22 +109,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleGEcomputational
 *
 *  =====================================================================
       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          TRANS
diff --git a/lib/linalg/disnan.f b/lib/linalg/disnan.f
index 355b82795565e5f9e54305258861f0a61a6f036b..a565ed36d48312d56c6e8b2304259a7971c34645 100644
--- a/lib/linalg/disnan.f
+++ b/lib/linalg/disnan.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DISNAN + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f"> 
+*> Download DISNAN + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/disnan.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/disnan.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/disnan.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       LOGICAL FUNCTION DISNAN( DIN )
-* 
+*
 *       .. Scalar Arguments ..
-*       DOUBLE PRECISION   DIN
+*       DOUBLE PRECISION, INTENT(IN) :: DIN
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -47,25 +47,25 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       LOGICAL FUNCTION DISNAN( DIN )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
-      DOUBLE PRECISION   DIN
+      DOUBLE PRECISION, INTENT(IN) :: DIN
 *     ..
 *
 *  =====================================================================
diff --git a/lib/linalg/dlabad.f b/lib/linalg/dlabad.f
index 9eda3c91db792aa8ff3b9c1e9ac8257c84fe0359..01b8158f663ed049fa08241db067d8cc6c50988a 100644
--- a/lib/linalg/dlabad.f
+++ b/lib/linalg/dlabad.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLABAD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f"> 
+*> Download DLABAD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabad.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabad.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabad.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLABAD( SMALL, LARGE )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   LARGE, SMALL
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -62,22 +62,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLABAD( SMALL, LARGE )
 *
-*  -- LAPACK auxiliary routine (version 3.4.0) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   LARGE, SMALL
diff --git a/lib/linalg/dlabrd.f b/lib/linalg/dlabrd.f
index 72d148119a159dedcf1b83fc48c477407060387c..b5e734dc7c1166c7de38f4fd85979b1ba6dc8220 100644
--- a/lib/linalg/dlabrd.f
+++ b/lib/linalg/dlabrd.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLABRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f"> 
+*> Download DLABRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlabrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlabrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlabrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
 *                          LDY )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            LDA, LDX, LDY, M, N, NB
 *       ..
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
 *      $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -110,7 +110,7 @@
 *>
 *> \param[out] TAUQ
 *> \verbatim
-*>          TAUQ is DOUBLE PRECISION array dimension (NB)
+*>          TAUQ is DOUBLE PRECISION array, dimension (NB)
 *>          The scalar factors of the elementary reflectors which
 *>          represent the orthogonal matrix Q. See Further Details.
 *> \endverbatim
@@ -151,12 +151,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -210,10 +210,10 @@
       SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
      $                   LDY )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            LDA, LDX, LDY, M, N, NB
diff --git a/lib/linalg/dlacn2.f b/lib/linalg/dlacn2.f
index 9dd3c85ea2a506e74e9415840776658bb9cb4952..952854043afc05418587f2e19d454dceb06c6b9b 100644
--- a/lib/linalg/dlacn2.f
+++ b/lib/linalg/dlacn2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLACN2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f"> 
+*> Download DLACN2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacn2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacn2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacn2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            KASE, N
 *       DOUBLE PRECISION   EST
@@ -28,7 +28,7 @@
 *       INTEGER            ISGN( * ), ISAVE( 3 )
 *       DOUBLE PRECISION   V( * ), X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -75,7 +75,7 @@
 *>          EST is DOUBLE PRECISION
 *>         On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be
 *>         unchanged from the previous call to DLACN2.
-*>         On exit, EST is an estimate (a lower bound) for norm(A). 
+*>         On exit, EST is an estimate (a lower bound) for norm(A).
 *> \endverbatim
 *>
 *> \param[in,out] KASE
@@ -96,12 +96,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -136,10 +136,10 @@
 *  =====================================================================
       SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            KASE, N
diff --git a/lib/linalg/dlacpy.f b/lib/linalg/dlacpy.f
index a9a23c9454eb8483aac927b9a59c88e29fade96c..d1c396724a5b5e473bb57bcf2479a881326e0724 100644
--- a/lib/linalg/dlacpy.f
+++ b/lib/linalg/dlacpy.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLACPY + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f"> 
+*> Download DLACPY + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, LDB, M, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -91,22 +91,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dladiv.f b/lib/linalg/dladiv.f
index 306a6b0020e39a5dd94b0045cc85ba15b33a678e..dd8110adf2f8005a05bccfd97fc34adca1fc84e2 100644
--- a/lib/linalg/dladiv.f
+++ b/lib/linalg/dladiv.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLADIV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f"> 
+*> Download DLADIV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLADIV( A, B, C, D, P, Q )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   A, B, C, D, P, Q
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -36,8 +36,9 @@
 *>            p + i*q = ---------
 *>                       c + i*d
 *>
-*> The algorithm is due to Robert L. Smith and can be found
-*> in D. Knuth, The art of Computer Programming, Vol.2, p.195
+*> The algorithm is due to Michael Baudin and Robert L. Smith
+*> and can be found in the paper
+*> "A Robust Complex Division in Scilab"
 *> \endverbatim
 *
 *  Arguments:
@@ -78,22 +79,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date January 2013
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLADIV( A, B, C, D, P, Q )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     January 2013
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   A, B, C, D, P, Q
@@ -101,28 +102,155 @@
 *
 *  =====================================================================
 *
+*     .. Parameters ..
+      DOUBLE PRECISION   BS
+      PARAMETER          ( BS = 2.0D0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+*
 *     .. Local Scalars ..
-      DOUBLE PRECISION   E, F
+      DOUBLE PRECISION   AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLADIV1
 *     ..
 *     .. Intrinsic Functions ..
-      INTRINSIC          ABS
+      INTRINSIC          ABS, MAX
 *     ..
 *     .. Executable Statements ..
 *
-      IF( ABS( D ).LT.ABS( C ) ) THEN
-         E = D / C
-         F = C + D*E
-         P = ( A+B*E ) / F
-         Q = ( B-A*E ) / F
+      AA = A
+      BB = B
+      CC = C
+      DD = D
+      AB = MAX( ABS(A), ABS(B) )
+      CD = MAX( ABS(C), ABS(D) )
+      S = 1.0D0
+
+      OV = DLAMCH( 'Overflow threshold' )
+      UN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Epsilon' )
+      BE = BS / (EPS*EPS)
+
+      IF( AB >= HALF*OV ) THEN
+         AA = HALF * AA
+         BB = HALF * BB
+         S  = TWO * S
+      END IF
+      IF( CD >= HALF*OV ) THEN
+         CC = HALF * CC
+         DD = HALF * DD
+         S  = HALF * S
+      END IF
+      IF( AB <= UN*BS/EPS ) THEN
+         AA = AA * BE
+         BB = BB * BE
+         S  = S / BE
+      END IF
+      IF( CD <= UN*BS/EPS ) THEN
+         CC = CC * BE
+         DD = DD * BE
+         S  = S * BE
+      END IF
+      IF( ABS( D ).LE.ABS( C ) ) THEN
+         CALL DLADIV1(AA, BB, CC, DD, P, Q)
       ELSE
-         E = C / D
-         F = D + C*E
-         P = ( B+A*E ) / F
-         Q = ( -A+B*E ) / F
+         CALL DLADIV1(BB, AA, DD, CC, P, Q)
+         Q = -Q
       END IF
+      P = P * S
+      Q = Q * S
 *
       RETURN
 *
 *     End of DLADIV
 *
       END
+
+*> \ingroup doubleOTHERauxiliary
+
+
+      SUBROUTINE DLADIV1( A, B, C, D, P, Q )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     January 2013
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, D, P, Q
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+*
+*     .. Local Scalars ..
+      DOUBLE PRECISION   R, T
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLADIV2
+      EXTERNAL           DLADIV2
+*     ..
+*     .. Executable Statements ..
+*
+      R = D / C
+      T = ONE / (C + D * R)
+      P = DLADIV2(A, B, C, D, R, T)
+      A = -A
+      Q = DLADIV2(B, A, C, D, R, T)
+*
+      RETURN
+*
+*     End of DLADIV1
+*
+      END
+
+*> \ingroup doubleOTHERauxiliary
+
+      DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     January 2013
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, D, R, T
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+*
+*     .. Local Scalars ..
+      DOUBLE PRECISION   BR
+*     ..
+*     .. Executable Statements ..
+*
+      IF( R.NE.ZERO ) THEN
+         BR = B * R
+         IF( BR.NE.ZERO ) THEN
+            DLADIV2 = (A + BR) * T
+         ELSE
+            DLADIV2 = A * T + (B * T) * R
+         END IF
+      ELSE
+         DLADIV2 = (A + D * (B / C)) * T
+      END IF
+*
+      RETURN
+*
+*     End of DLADIV12
+*
+      END
diff --git a/lib/linalg/dlae2.f b/lib/linalg/dlae2.f
index 302eeaa1f7561f760bd539e2ee7a9f4623914ad0..ed77ff6dfe67864d60573202409b695153da48d0 100644
--- a/lib/linalg/dlae2.f
+++ b/lib/linalg/dlae2.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAE2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f"> 
+*> Download DLAE2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlae2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlae2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlae2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   A, B, C, RT1, RT2
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -73,14 +73,14 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -102,10 +102,10 @@
 *  =====================================================================
       SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   A, B, C, RT1, RT2
diff --git a/lib/linalg/dlaed0.f b/lib/linalg/dlaed0.f
index d8d7f53e1d0fb2887556fb212ce3b0c106399fbc..4e92da98ea93d66689f3f9e0ba3aec4766ba5c85 100644
--- a/lib/linalg/dlaed0.f
+++ b/lib/linalg/dlaed0.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED0 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f"> 
+*> Download DLAED0 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed0.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed0.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed0.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
 *                          WORK, IWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
 *       ..
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
 *      $                   WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -153,12 +153,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -172,10 +172,10 @@
       SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
      $                   WORK, IWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
diff --git a/lib/linalg/dlaed1.f b/lib/linalg/dlaed1.f
index c37c1d2100c3fe533705cd2319ecaf88ea19ca9c..30e71fa241c71893659ce9e4a1d434ada04b2947 100644
--- a/lib/linalg/dlaed1.f
+++ b/lib/linalg/dlaed1.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED1 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f"> 
+*> Download DLAED1 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed1.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed1.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed1.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
 *                          INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            CUTPNT, INFO, LDQ, N
 *       DOUBLE PRECISION   RHO
@@ -29,7 +29,7 @@
 *       INTEGER            INDXQ( * ), IWORK( * )
 *       DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -54,7 +54,7 @@
 *>
 *>       The first stage consists of deflating the size of the problem
 *>       when there are multiple eigenvalues or if there is a zero in
-*>       the Z vector.  For each such occurence the dimension of the
+*>       the Z vector.  For each such occurrence the dimension of the
 *>       secular equation problem is reduced by one.  This stage is
 *>       performed by the routine DLAED2.
 *>
@@ -143,12 +143,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -163,10 +163,10 @@
       SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
      $                   INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            CUTPNT, INFO, LDQ, N
diff --git a/lib/linalg/dlaed2.f b/lib/linalg/dlaed2.f
index a75d72a737a05680235b920ac24d825961c20a32..fbcc87a8803980a043537495c96f1a7f58157625 100644
--- a/lib/linalg/dlaed2.f
+++ b/lib/linalg/dlaed2.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f"> 
+*> Download DLAED2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
 *                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDQ, N, N1
 *       DOUBLE PRECISION   RHO
@@ -31,7 +31,7 @@
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 *      $                   W( * ), Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -192,12 +192,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -212,10 +212,10 @@
       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
      $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDQ, N, N1
@@ -520,10 +520,10 @@
 *     into the last N - K slots of D and Q respectively.
 *
       IF( K.LT.N ) THEN
-         CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, 
+         CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
      $                Q( 1, K+1 ), LDQ )
          CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
-      END IF         
+      END IF
 *
 *     Copy CTOT into COLTYP for referencing in DLAED3.
 *
diff --git a/lib/linalg/dlaed3.f b/lib/linalg/dlaed3.f
index 411d0f890f32e0a37f17c29a856494b0788dbd6d..d200fc0a2202922354aba22e6758c7b4b62a690f 100644
--- a/lib/linalg/dlaed3.f
+++ b/lib/linalg/dlaed3.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED3 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f"> 
+*> Download DLAED3 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed3.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed3.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed3.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
 *                          CTOT, W, S, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDQ, N, N1
 *       DOUBLE PRECISION   RHO
@@ -30,7 +30,7 @@
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 *      $                   S( * ), W( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,7 +116,7 @@
 *>
 *> \param[in] Q2
 *> \verbatim
-*>          Q2 is DOUBLE PRECISION array, dimension (LDQ2, N)
+*>          Q2 is DOUBLE PRECISION array, dimension (LDQ2*N)
 *>          The first K columns of this matrix contain the non-deflated
 *>          eigenvectors for the split problem.
 *> \endverbatim
@@ -165,12 +165,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -185,10 +185,10 @@
       SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
      $                   CTOT, W, S, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDQ, N, N1
diff --git a/lib/linalg/dlaed4.f b/lib/linalg/dlaed4.f
index c898b5b6187ad49168132d5b110214a9aa0c35e3..e7dc839df5a57ca9f094785286553c45db564452 100644
--- a/lib/linalg/dlaed4.f
+++ b/lib/linalg/dlaed4.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED4 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f"> 
+*> Download DLAED4 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed4.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed4.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed4.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            I, INFO, N
 *       DOUBLE PRECISION   DLAM, RHO
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -127,12 +127,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -145,10 +145,10 @@
 *  =====================================================================
       SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            I, INFO, N
diff --git a/lib/linalg/dlaed5.f b/lib/linalg/dlaed5.f
index 3ac9aa19a8a044b02876f507592dc47570024b84..3ea9e401cfbc2849e58344c4bd2e2b58c3d817c7 100644
--- a/lib/linalg/dlaed5.f
+++ b/lib/linalg/dlaed5.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED5 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f"> 
+*> Download DLAED5 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed5.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed5.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed5.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            I
 *       DOUBLE PRECISION   DLAM, RHO
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -90,12 +90,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -108,10 +108,10 @@
 *  =====================================================================
       SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            I
diff --git a/lib/linalg/dlaed6.f b/lib/linalg/dlaed6.f
index 1ce4932b8e7e97650db1bbe5f47e5c00d3414ce9..daa8db39e4ce4a5f6a26c70017b3a3224b4027ab 100644
--- a/lib/linalg/dlaed6.f
+++ b/lib/linalg/dlaed6.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED6 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f"> 
+*> Download DLAED6 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed6.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed6.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed6.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       LOGICAL            ORGATI
 *       INTEGER            INFO, KNITER
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( 3 ), Z( 3 )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -110,12 +110,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -140,10 +140,10 @@
 *  =====================================================================
       SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       LOGICAL            ORGATI
@@ -175,7 +175,7 @@
       INTEGER            I, ITER, NITER
       DOUBLE PRECISION   A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
      $                   FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
-     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4, 
+     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4,
      $                   LBD, UBD
 *     ..
 *     .. Intrinsic Functions ..
@@ -195,7 +195,7 @@
       IF( FINIT .LT. ZERO )THEN
          LBD = ZERO
       ELSE
-         UBD = ZERO 
+         UBD = ZERO
       END IF
 *
       NITER = 1
@@ -363,7 +363,7 @@
 *
          TAU = TAU + ETA
          IF( TAU .LT. LBD .OR. TAU .GT. UBD )
-     $      TAU = ( LBD + UBD )/TWO 
+     $      TAU = ( LBD + UBD )/TWO
 *
          FC = ZERO
          ERRETM = ZERO
@@ -381,13 +381,14 @@
                DF = DF + TEMP2
                DDF = DDF + TEMP3
             ELSE
-              GO TO 60
+               GO TO 60
             END IF
    40    CONTINUE
          F = FINIT + TAU*FC
          ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
      $            ABS( TAU )*DF
-         IF( ABS( F ).LE.EPS*ERRETM )
+         IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR.
+     $      ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) )  )
      $      GO TO 60
          IF( F .LE. ZERO )THEN
             LBD = TAU
diff --git a/lib/linalg/dlaed7.f b/lib/linalg/dlaed7.f
index 972c1bc5902428506aea55575f3c36f0e10e560e..9c528addedf2af22a69c20cd91c5423ce8ecc098 100644
--- a/lib/linalg/dlaed7.f
+++ b/lib/linalg/dlaed7.f
@@ -2,18 +2,18 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED7 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed7.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed7.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed7.f"> 
+*> Download DLAED7 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed7.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed7.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed7.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -22,7 +22,7 @@
 *                          LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR,
 *                          PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK,
 *                          INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N,
 *      $                   QSIZ, TLVLS
@@ -34,7 +34,7 @@
 *       DOUBLE PRECISION   D( * ), GIVNUM( 2, * ), Q( LDQ, * ),
 *      $                   QSTORE( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -59,7 +59,7 @@
 *>
 *>       The first stage consists of deflating the size of the problem
 *>       when there are multiple eigenvalues or if there is a zero in
-*>       the Z vector.  For each such occurence the dimension of the
+*>       the Z vector.  For each such occurrence the dimension of the
 *>       secular equation problem is reduced by one.  This stage is
 *>       performed by the routine DLAED8.
 *>
@@ -239,12 +239,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -260,10 +260,10 @@
      $                   PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK,
      $                   INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N,
@@ -304,7 +304,7 @@
       ELSE IF( N.LT.0 ) THEN
          INFO = -2
       ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN
-         INFO = -4
+         INFO = -3
       ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
          INFO = -9
       ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN
diff --git a/lib/linalg/dlaed8.f b/lib/linalg/dlaed8.f
index 42b4ea15775f8784d4a502905d43d09c0d77b217..c053347b10d77c0c1acddc6dc2c9823a3c08f4c0 100644
--- a/lib/linalg/dlaed8.f
+++ b/lib/linalg/dlaed8.f
@@ -2,18 +2,18 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED8 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed8.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed8.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed8.f"> 
+*> Download DLAED8 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed8.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed8.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed8.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -21,7 +21,7 @@
 *       SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO,
 *                          CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR,
 *                          GIVCOL, GIVNUM, INDXP, INDX, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N,
 *      $                   QSIZ
@@ -33,7 +33,7 @@
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), GIVNUM( 2, * ),
 *      $                   Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -223,12 +223,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -243,10 +243,10 @@
      $                   CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR,
      $                   GIVCOL, GIVNUM, INDXP, INDX, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N,
@@ -308,8 +308,8 @@
       END IF
 *
 *     Need to initialize GIVPTR to O here in case of quick exit
-*     to prevent an unspecified code behavior (usually sigfault) 
-*     when IWORK array on entry to *stedc is not zeroed 
+*     to prevent an unspecified code behavior (usually sigfault)
+*     when IWORK array on entry to *stedc is not zeroed
 *     (or at least some IWORK entries which used in *laed7 for GIVPTR).
 *
       GIVPTR = 0
diff --git a/lib/linalg/dlaed9.f b/lib/linalg/dlaed9.f
index 8aa0687573af1f84699d85b89cafb38bb48273cd..d3be22502a1a9d5079299014c18f7054e9fda332 100644
--- a/lib/linalg/dlaed9.f
+++ b/lib/linalg/dlaed9.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAED9 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed9.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed9.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed9.f"> 
+*> Download DLAED9 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaed9.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaed9.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaed9.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W,
 *                          S, LDS, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, KSTART, KSTOP, LDQ, LDS, N
 *       DOUBLE PRECISION   RHO
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ),
 *      $                   W( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -137,12 +137,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -156,10 +156,10 @@
       SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W,
      $                   S, LDS, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, KSTART, KSTOP, LDQ, LDS, N
diff --git a/lib/linalg/dlaeda.f b/lib/linalg/dlaeda.f
index 749a7c365ac80a37bad29bab82b8a2ba63e2ff35..4ca08a087920eb185f26831e0df5b0dbc3a9e48c 100644
--- a/lib/linalg/dlaeda.f
+++ b/lib/linalg/dlaeda.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAEDA + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaeda.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaeda.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaeda.f"> 
+*> Download DLAEDA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaeda.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaeda.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaeda.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
 *                          GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            CURLVL, CURPBM, INFO, N, TLVLS
 *       ..
@@ -29,7 +29,7 @@
 *      $                   PRMPTR( * ), QPTR( * )
 *       DOUBLE PRECISION   GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -147,12 +147,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -166,10 +166,10 @@
       SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
      $                   GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            CURLVL, CURPBM, INFO, N, TLVLS
diff --git a/lib/linalg/dlaev2.f b/lib/linalg/dlaev2.f
index 2e333ddf2c7ddafd26f667bb096c0fabbfaaacf8..4906f1a20c7c038dedbc98f39ef1925260e4b114 100644
--- a/lib/linalg/dlaev2.f
+++ b/lib/linalg/dlaev2.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAEV2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaev2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaev2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaev2.f"> 
+*> Download DLAEV2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaev2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaev2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaev2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   A, B, C, CS1, RT1, RT2, SN1
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -89,14 +89,14 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -120,10 +120,10 @@
 *  =====================================================================
       SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   A, B, C, CS1, RT1, RT2, SN1
diff --git a/lib/linalg/dlaisnan.f b/lib/linalg/dlaisnan.f
index 58595c5c33097e97a0fe8037eeaf1b5071393570..c2e87d88a005bc78c9f25a80a06afc46be6921fb 100644
--- a/lib/linalg/dlaisnan.f
+++ b/lib/linalg/dlaisnan.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAISNAN + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaisnan.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaisnan.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaisnan.f"> 
+*> Download DLAISNAN + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaisnan.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaisnan.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaisnan.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 )
-* 
+*
 *       .. Scalar Arguments ..
-*       DOUBLE PRECISION   DIN1, DIN2
+*       DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -62,25 +62,25 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
-      DOUBLE PRECISION   DIN1, DIN2
+      DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2
 *     ..
 *
 *  =====================================================================
diff --git a/lib/linalg/dlals0.f b/lib/linalg/dlals0.f
new file mode 100644
index 0000000000000000000000000000000000000000..d4cff166d6ab717bf26b5573357731cdce5bf985
--- /dev/null
+++ b/lib/linalg/dlals0.f
@@ -0,0 +1,499 @@
+*> \brief \b DLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALS0 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlals0.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlals0.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlals0.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+*                          PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+*                          POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+*      $                   LDGNUM, NL, NR, NRHS, SQRE
+*       DOUBLE PRECISION   C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+*       DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+*      $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+*      $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALS0 applies back the multiplying factors of either the left or the
+*> right singular vector matrix of a diagonal matrix appended by a row
+*> to the right hand side matrix B in solving the least squares problem
+*> using the divide-and-conquer SVD approach.
+*>
+*> For the left singular vector matrix, three types of orthogonal
+*> matrices are involved:
+*>
+*> (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*>      pairs of columns/rows they were applied to are stored in GIVCOL;
+*>      and the C- and S-values of these rotations are stored in GIVNUM.
+*>
+*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*>      row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*>      J-th row.
+*>
+*> (3L) The left singular vector matrix of the remaining matrix.
+*>
+*> For the right singular vector matrix, four types of orthogonal
+*> matrices are involved:
+*>
+*> (1R) The right singular vector matrix of the remaining matrix.
+*>
+*> (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*>      null space.
+*>
+*> (3R) The inverse transformation of (2L).
+*>
+*> (4R) The inverse transformation of (1L).
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed in
+*>         factored form:
+*>         = 0: Left singular vector matrix.
+*>         = 1: Right singular vector matrix.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block. NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block. NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*>         and column dimension M = N + SQRE.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B and BX. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*>         On input, B contains the right hand sides of the least
+*>         squares problem in rows 1 through M. On output, B contains
+*>         the solution X in rows 1 through N.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B. LDB must be at least
+*>         max(1,MAX( M, N ) ).
+*> \endverbatim
+*>
+*> \param[out] BX
+*> \verbatim
+*>          BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*> \endverbatim
+*>
+*> \param[in] LDBX
+*> \verbatim
+*>          LDBX is INTEGER
+*>         The leading dimension of BX.
+*> \endverbatim
+*>
+*> \param[in] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) applied
+*>         to the two blocks.
+*> \endverbatim
+*>
+*> \param[in] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem.
+*> \endverbatim
+*>
+*> \param[in] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of rows/columns
+*>         involved in a Givens rotation.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         The leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[in] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value used in the
+*>         corresponding Givens rotation.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of arrays DIFR, POLES and
+*>         GIVNUM, must be at least K.
+*> \endverbatim
+*>
+*> \param[in] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         On entry, POLES(1:K, 1) contains the new singular
+*>         values obtained from solving the secular equation, and
+*>         POLES(1:K, 2) is an array containing the poles in the secular
+*>         equation.
+*> \endverbatim
+*>
+*> \param[in] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( K ).
+*>         On entry, DIFL(I) is the distance between I-th updated
+*>         (undeflated) singular value and the I-th (undeflated) old
+*>         singular value.
+*> \endverbatim
+*>
+*> \param[in] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+*>         On entry, DIFR(I, 1) contains the distances between I-th
+*>         updated (undeflated) singular value and the I+1-th
+*>         (undeflated) old singular value. And DIFR(I, 2) is the
+*>         normalizing factor for the I-th right singular vector.
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( K )
+*>         Contain the components of the deflation-adjusted updating row
+*>         vector.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix,
+*>         This is the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[in] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[in] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( K )
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      DOUBLE PRECISION   C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+     $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+     $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, M, N, NLP1
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DLASCL, DROT, DSCAL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL DCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL DCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL DSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 50 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                        ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                           DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                           DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               WORK( 1 ) = NEGONE
+               TEMP = DNRM2( K, WORK, 1 )
+               CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+     $                     B( J, 1 ), LDB )
+               CALL DLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+   50       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 80 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -Z( J ) / DIFL( J ) /
+     $                        ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 60 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1,
+     $                           2 ) )-DIFR( I, 1 ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   60          CONTINUE
+               DO 70 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I,
+     $                           2 ) )-DIFL( I ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   70          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+     $                     BX( J, 1 ), LDBX )
+   80       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL DROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ),
+     $                   LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL DCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 90 I = 2, N
+            CALL DCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+   90    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 100 I = GIVPTR, 1, -1
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 -GIVNUM( I, 1 ) )
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLALS0
+*
+      END
diff --git a/lib/linalg/dlalsa.f b/lib/linalg/dlalsa.f
new file mode 100644
index 0000000000000000000000000000000000000000..b643f11c0b65a4b9aacc39ed9a768be2979dc72f
--- /dev/null
+++ b/lib/linalg/dlalsa.f
@@ -0,0 +1,493 @@
+*> \brief \b DLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALSA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlalsa.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlalsa.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlalsa.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+*                          LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+*                          GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+*                          IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+*      $                   SMLSIZ
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+*      $                   K( * ), PERM( LDGCOL, * )
+*       DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
+*      $                   DIFL( LDU, * ), DIFR( LDU, * ),
+*      $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+*      $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+*      $                   Z( LDU, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALSA is an itermediate step in solving the least squares problem
+*> by computing the SVD of the coefficient matrix in compact form (The
+*> singular vectors are computed as products of simple orthorgonal
+*> matrices.).
+*>
+*> If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+*> matrix of an upper bidiagonal matrix to the right hand side; and if
+*> ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+*> right hand side. The singular vector matrices were generated in
+*> compact form by DLALSA.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether the left or the right singular vector
+*>         matrix is involved.
+*>         = 0: Left singular vector matrix
+*>         = 1: Right singular vector matrix
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The row and column dimensions of the upper bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B and BX. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*>         On input, B contains the right hand sides of the least
+*>         squares problem in rows 1 through M.
+*>         On output, B contains the solution X in rows 1 through N.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B in the calling subprogram.
+*>         LDB must be at least max(1,MAX( M, N ) ).
+*> \endverbatim
+*>
+*> \param[out] BX
+*> \verbatim
+*>          BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*>         On exit, the result of applying the left or right singular
+*>         vector matrix to B.
+*> \endverbatim
+*>
+*> \param[in] LDBX
+*> \verbatim
+*>          LDBX is INTEGER
+*>         The leading dimension of BX.
+*> \endverbatim
+*>
+*> \param[in] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+*>         On entry, U contains the left singular vector matrices of all
+*>         subproblems at the bottom level.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER, LDU = > N.
+*>         The leading dimension of arrays U, VT, DIFL, DIFR,
+*>         POLES, GIVNUM, and Z.
+*> \endverbatim
+*>
+*> \param[in] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+*>         On entry, VT**T contains the right singular vector matrices of
+*>         all subproblems at the bottom level.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER array, dimension ( N ).
+*> \endverbatim
+*>
+*> \param[in] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*>         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*> \endverbatim
+*>
+*> \param[in] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*>         distances between singular values on the I-th level and
+*>         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*>         record the normalizing factors of the right singular vectors
+*>         matrices of subproblems on I-th level.
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*>         On entry, Z(1, I) contains the components of the deflation-
+*>         adjusted updating row vector for subproblems on the I-th
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*>         singular values involved in the secular equations on the I-th
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER array, dimension ( N ).
+*>         On entry, GIVPTR( I ) records the number of Givens
+*>         rotations performed on the I-th problem on the computation
+*>         tree.
+*> \endverbatim
+*>
+*> \param[in] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*>         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*>         locations of Givens rotations performed on the I-th level on
+*>         the computation tree.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER, LDGCOL = > N.
+*>         The leading dimension of arrays GIVCOL and PERM.
+*> \endverbatim
+*>
+*> \param[in] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( LDGCOL, NLVL ).
+*>         On entry, PERM(*, I) records permutations done on the I-th
+*>         level of the computation tree.
+*> \endverbatim
+*>
+*> \param[in] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*>         values of Givens rotations performed on the I-th level on the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array, dimension ( N ).
+*>         On entry, if the I-th subproblem is not square,
+*>         C( I ) contains the C-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[in] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension ( N ).
+*>         On entry, if the I-th subproblem is not square,
+*>         S( I ) contains the S-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (3*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
+     $                   DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+     $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2,
+     $                   ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL,
+     $                   NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLALS0, DLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 50.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 50
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 10 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   10 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 20 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+   20 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 40 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 30 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   30    CONTINUE
+   40 CONTINUE
+      GO TO 90
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+   50 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 70 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 60 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   60    CONTINUE
+   70 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 80 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   80 CONTINUE
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of DLALSA
+*
+      END
diff --git a/lib/linalg/dlalsd.f b/lib/linalg/dlalsd.f
new file mode 100644
index 0000000000000000000000000000000000000000..510e0455a6a922ab462d57bd12024aa9bb4a9188
--- /dev/null
+++ b/lib/linalg/dlalsd.f
@@ -0,0 +1,523 @@
+*> \brief \b DLALSD uses the singular value decomposition of A to solve the least squares problem.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALSD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlalsd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlalsd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlalsd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+*                          RANK, WORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          UPLO
+*       INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALSD uses the singular value decomposition of A to solve the least
+*> squares problem of finding X to minimize the Euclidean norm of each
+*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*> are N-by-NRHS. The solution X overwrites B.
+*>
+*> The singular values of A smaller than RCOND times the largest
+*> singular value are treated as zero in solving the least squares
+*> problem; in this case a minimum norm solution is returned.
+*> The actual singular values are returned in D in ascending order.
+*>
+*> This code makes very mild assumptions about floating point
+*> arithmetic. It will work on machines with a guard digit in
+*> add/subtract, or on those binary machines without guard digits
+*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*> It could conceivably fail on hexadecimal or decimal machines
+*> without guard digits, but we know of none.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>         = 'U': D and E define an upper bidiagonal matrix.
+*>         = 'L': D and E define a  lower bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The dimension of the  bidiagonal matrix.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension (N)
+*>         On entry D contains the main diagonal of the bidiagonal
+*>         matrix. On exit, if INFO = 0, D contains its singular values.
+*> \endverbatim
+*>
+*> \param[in,out] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array, dimension (N-1)
+*>         Contains the super-diagonal entries of the bidiagonal matrix.
+*>         On exit, E has been destroyed.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>         On input, B contains the right hand sides of the least
+*>         squares problem. On output, B contains the solution X.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B in the calling subprogram.
+*>         LDB must be at least max(1,N).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>         The singular values of A less than or equal to RCOND times
+*>         the largest singular value are treated as zero in solving
+*>         the least squares problem. If RCOND is negative,
+*>         machine precision is used instead.
+*>         For example, if diag(S)*X=B were the least squares problem,
+*>         where diag(S) is a diagonal matrix of singular values, the
+*>         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*>         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*>         RCOND*max(S).
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>         The number of singular values of A greater than RCOND times
+*>         the largest singular value.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension at least
+*>         (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+*>         where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension at least
+*>         (3*N*NLVL + 11*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>         = 0:  successful exit.
+*>         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>         > 0:  The algorithm failed to compute a singular value while
+*>               working on the submatrix lying in rows and columns
+*>               INFO/(N+1) through MOD(INFO,N+1).
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, IWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL,
+     $                   NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI,
+     $                   SMLSZP, SQRE, ST, ST1, U, VT, Z
+      DOUBLE PRECISION   CS, EPS, ORGNRM, R, RCND, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           IDAMAX, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL,
+     $                   DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCND = EPS
+      ELSE
+         RCND = RCOND
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL DLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL DROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               WORK( I*2-1 ) = CS
+               WORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = WORK( J*2-1 )
+                  SN = WORK( J*2 )
+                  CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         NWORK = 1 + N*N
+         CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N )
+         CALL DLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B,
+     $                LDB, WORK( NWORK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
+         DO 40 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+   40    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+     $               WORK( NWORK ), N )
+         CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB )
+*
+*        Unscale.
+*
+         CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL DLASRT( 'D', N, D, INFO )
+         CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      BX = GIVNUM + 2*NLVL*N
+      NWORK = BX + N*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 50 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   50 CONTINUE
+*
+      DO 60 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by DLASDQ.
+*
+               CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      WORK( VT+ST1 ), N )
+               CALL DLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ),
+     $                      E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ),
+     $                      N, B( ST, 1 ), LDB, WORK( NWORK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), WORK( DIFL+ST1 ),
+     $                      WORK( DIFR+ST1 ), WORK( Z+ST1 ),
+     $                      WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      WORK( GIVNUM+ST1 ), WORK( C+ST1 ),
+     $                      WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, WORK( U+ST1 ), N,
+     $                      WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                      WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                      WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+   60 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
+*
+      DO 70 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+   70 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 80 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+            CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO,
+     $                  B( ST, 1 ), LDB )
+         ELSE
+            CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, WORK( U+ST1 ), N,
+     $                   WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                   WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                   WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                   IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+   80 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL DLASRT( 'D', N, D, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of DLALSD
+*
+      END
diff --git a/lib/linalg/dlamch.f b/lib/linalg/dlamch.f
index 8f830e87cd94024cc83feb2eecd26e137f25f24c..76f875cef65d732143f6a48d9dbda2bc652c445e 100644
--- a/lib/linalg/dlamch.f
+++ b/lib/linalg/dlamch.f
@@ -1,47 +1,77 @@
+*> \brief \b DLAMCH
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*      DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLAMCH determines double precision machine parameters.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] CMACH
+*> \verbatim
+*>          Specifies the value to be returned by DLAMCH:
+*>          = 'E' or 'e',   DLAMCH := eps
+*>          = 'S' or 's ,   DLAMCH := sfmin
+*>          = 'B' or 'b',   DLAMCH := base
+*>          = 'P' or 'p',   DLAMCH := eps*base
+*>          = 'N' or 'n',   DLAMCH := t
+*>          = 'R' or 'r',   DLAMCH := rnd
+*>          = 'M' or 'm',   DLAMCH := emin
+*>          = 'U' or 'u',   DLAMCH := rmin
+*>          = 'L' or 'l',   DLAMCH := emax
+*>          = 'O' or 'o',   DLAMCH := rmax
+*>          where
+*>          eps   = relative machine precision
+*>          sfmin = safe minimum, such that 1/sfmin does not overflow
+*>          base  = base of the machine
+*>          prec  = eps*base
+*>          t     = number of (base) digits in the mantissa
+*>          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
+*>          emin  = minimum exponent before (gradual) underflow
+*>          rmin  = underflow threshold - base**(emin-1)
+*>          emax  = largest exponent before overflow
+*>          rmax  = overflow threshold  - (base**emax)*(1-eps)
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup auxOTHERauxiliary
+*
+*  =====================================================================
       DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
 *
-*  -- LAPACK auxiliary routine (version 3.2) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          CMACH
 *     ..
 *
-*  Purpose
-*  =======
-*
-*  DLAMCH determines double precision machine parameters.
-*
-*  Arguments
-*  =========
-*
-*  CMACH   (input) CHARACTER*1
-*          Specifies the value to be returned by DLAMCH:
-*          = 'E' or 'e',   DLAMCH := eps
-*          = 'S' or 's ,   DLAMCH := sfmin
-*          = 'B' or 'b',   DLAMCH := base
-*          = 'P' or 'p',   DLAMCH := eps*base
-*          = 'N' or 'n',   DLAMCH := t
-*          = 'R' or 'r',   DLAMCH := rnd
-*          = 'M' or 'm',   DLAMCH := emin
-*          = 'U' or 'u',   DLAMCH := rmin
-*          = 'L' or 'l',   DLAMCH := emax
-*          = 'O' or 'o',   DLAMCH := rmax
-*
-*          where
-*
-*          eps   = relative machine precision
-*          sfmin = safe minimum, such that 1/sfmin does not overflow
-*          base  = base of the machine
-*          prec  = eps*base
-*          t     = number of (base) digits in the mantissa
-*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
-*          emin  = minimum exponent before (gradual) underflow
-*          rmin  = underflow threshold - base**(emin-1)
-*          emax  = largest exponent before overflow
-*          rmax  = overflow threshold  - (base**emax)*(1-eps)
-*
 * =====================================================================
 *
 *     .. Parameters ..
@@ -49,552 +79,101 @@
       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
-      LOGICAL            FIRST, LRND
-      INTEGER            BETA, IMAX, IMIN, IT
-      DOUBLE PRECISION   BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN,
-     $                   RND, SFMIN, SMALL, T
+      DOUBLE PRECISION   RND, EPS, SFMIN, SMALL, RMACH
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
       EXTERNAL           LSAME
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLAMC2
-*     ..
-*     .. Save statement ..
-      SAVE               FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN,
-     $                   EMAX, RMAX, PREC
-*     ..
-*     .. Data statements ..
-      DATA               FIRST / .TRUE. /
+*     .. Intrinsic Functions ..
+      INTRINSIC          DIGITS, EPSILON, HUGE, MAXEXPONENT,
+     $                   MINEXPONENT, RADIX, TINY
 *     ..
 *     .. Executable Statements ..
 *
-      IF( FIRST ) THEN
-         CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX )
-         BASE = BETA
-         T = IT
-         IF( LRND ) THEN
-            RND = ONE
-            EPS = ( BASE**( 1-IT ) ) / 2
-         ELSE
-            RND = ZERO
-            EPS = BASE**( 1-IT )
-         END IF
-         PREC = EPS*BASE
-         EMIN = IMIN
-         EMAX = IMAX
-         SFMIN = RMIN
-         SMALL = ONE / RMAX
-         IF( SMALL.GE.SFMIN ) THEN
 *
-*           Use SMALL plus a bit, to avoid the possibility of rounding
-*           causing overflow when computing  1/sfmin.
+*     Assume rounding, not chopping. Always.
 *
-            SFMIN = SMALL*( ONE+EPS )
-         END IF
+      RND = ONE
+*
+      IF( ONE.EQ.RND ) THEN
+         EPS = EPSILON(ZERO) * 0.5
+      ELSE
+         EPS = EPSILON(ZERO)
       END IF
 *
       IF( LSAME( CMACH, 'E' ) ) THEN
          RMACH = EPS
       ELSE IF( LSAME( CMACH, 'S' ) ) THEN
+         SFMIN = TINY(ZERO)
+         SMALL = ONE / HUGE(ZERO)
+         IF( SMALL.GE.SFMIN ) THEN
+*
+*           Use SMALL plus a bit, to avoid the possibility of rounding
+*           causing overflow when computing  1/sfmin.
+*
+            SFMIN = SMALL*( ONE+EPS )
+         END IF
          RMACH = SFMIN
       ELSE IF( LSAME( CMACH, 'B' ) ) THEN
-         RMACH = BASE
+         RMACH = RADIX(ZERO)
       ELSE IF( LSAME( CMACH, 'P' ) ) THEN
-         RMACH = PREC
+         RMACH = EPS * RADIX(ZERO)
       ELSE IF( LSAME( CMACH, 'N' ) ) THEN
-         RMACH = T
+         RMACH = DIGITS(ZERO)
       ELSE IF( LSAME( CMACH, 'R' ) ) THEN
          RMACH = RND
       ELSE IF( LSAME( CMACH, 'M' ) ) THEN
-         RMACH = EMIN
+         RMACH = MINEXPONENT(ZERO)
       ELSE IF( LSAME( CMACH, 'U' ) ) THEN
-         RMACH = RMIN
+         RMACH = tiny(zero)
       ELSE IF( LSAME( CMACH, 'L' ) ) THEN
-         RMACH = EMAX
+         RMACH = MAXEXPONENT(ZERO)
       ELSE IF( LSAME( CMACH, 'O' ) ) THEN
-         RMACH = RMAX
+         RMACH = HUGE(ZERO)
+      ELSE
+         RMACH = ZERO
       END IF
 *
       DLAMCH = RMACH
-      FIRST  = .FALSE.
       RETURN
 *
 *     End of DLAMCH
 *
       END
-*
-************************************************************************
-*
-      SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 )
-*
-*  -- LAPACK auxiliary routine (version 3.2) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
-*
-*     .. Scalar Arguments ..
-      LOGICAL            IEEE1, RND
-      INTEGER            BETA, T
-*     ..
-*
-*  Purpose
-*  =======
-*
-*  DLAMC1 determines the machine parameters given by BETA, T, RND, and
-*  IEEE1.
-*
-*  Arguments
-*  =========
-*
-*  BETA    (output) INTEGER
-*          The base of the machine.
-*
-*  T       (output) INTEGER
-*          The number of ( BETA ) digits in the mantissa.
-*
-*  RND     (output) LOGICAL
-*          Specifies whether proper rounding  ( RND = .TRUE. )  or
-*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
-*          be a reliable guide to the way in which the machine performs
-*          its arithmetic.
-*
-*  IEEE1   (output) LOGICAL
-*          Specifies whether rounding appears to be done in the IEEE
-*          'round to nearest' style.
-*
-*  Further Details
-*  ===============
-*
-*  The routine is based on the routine  ENVRON  by Malcolm and
-*  incorporates suggestions by Gentleman and Marovich. See
-*
-*     Malcolm M. A. (1972) Algorithms to reveal properties of
-*        floating-point arithmetic. Comms. of the ACM, 15, 949-951.
-*
-*     Gentleman W. M. and Marovich S. B. (1974) More on algorithms
-*        that reveal properties of floating point arithmetic units.
-*        Comms. of the ACM, 17, 276-277.
-*
-* =====================================================================
-*
-*     .. Local Scalars ..
-      LOGICAL            FIRST, LIEEE1, LRND
-      INTEGER            LBETA, LT
-      DOUBLE PRECISION   A, B, C, F, ONE, QTR, SAVEC, T1, T2
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMC3
-      EXTERNAL           DLAMC3
-*     ..
-*     .. Save statement ..
-      SAVE               FIRST, LIEEE1, LBETA, LRND, LT
-*     ..
-*     .. Data statements ..
-      DATA               FIRST / .TRUE. /
-*     ..
-*     .. Executable Statements ..
-*
-      IF( FIRST ) THEN
-         ONE = 1
-*
-*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
-*        IEEE1, T and RND.
-*
-*        Throughout this routine  we use the function  DLAMC3  to ensure
-*        that relevant values are  stored and not held in registers,  or
-*        are not affected by optimizers.
-*
-*        Compute  a = 2.0**m  with the  smallest positive integer m such
-*        that
-*
-*           fl( a + 1.0 ) = a.
-*
-         A = 1
-         C = 1
-*
-*+       WHILE( C.EQ.ONE )LOOP
-   10    CONTINUE
-         IF( C.EQ.ONE ) THEN
-            A = 2*A
-            C = DLAMC3( A, ONE )
-            C = DLAMC3( C, -A )
-            GO TO 10
-         END IF
-*+       END WHILE
-*
-*        Now compute  b = 2.0**m  with the smallest positive integer m
-*        such that
-*
-*           fl( a + b ) .gt. a.
-*
-         B = 1
-         C = DLAMC3( A, B )
-*
-*+       WHILE( C.EQ.A )LOOP
-   20    CONTINUE
-         IF( C.EQ.A ) THEN
-            B = 2*B
-            C = DLAMC3( A, B )
-            GO TO 20
-         END IF
-*+       END WHILE
-*
-*        Now compute the base.  a and c  are neighbouring floating point
-*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
-*        their difference is beta. Adding 0.25 to c is to ensure that it
-*        is truncated to beta and not ( beta - 1 ).
-*
-         QTR = ONE / 4
-         SAVEC = C
-         C = DLAMC3( C, -A )
-         LBETA = C + QTR
-*
-*        Now determine whether rounding or chopping occurs,  by adding a
-*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
-*
-         B = LBETA
-         F = DLAMC3( B / 2, -B / 100 )
-         C = DLAMC3( F, A )
-         IF( C.EQ.A ) THEN
-            LRND = .TRUE.
-         ELSE
-            LRND = .FALSE.
-         END IF
-         F = DLAMC3( B / 2, B / 100 )
-         C = DLAMC3( F, A )
-         IF( ( LRND ) .AND. ( C.EQ.A ) )
-     $      LRND = .FALSE.
-*
-*        Try and decide whether rounding is done in the  IEEE  'round to
-*        nearest' style. B/2 is half a unit in the last place of the two
-*        numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
-*        zero, and SAVEC is odd. Thus adding B/2 to A should not  change
-*        A, but adding B/2 to SAVEC should change SAVEC.
-*
-         T1 = DLAMC3( B / 2, A )
-         T2 = DLAMC3( B / 2, SAVEC )
-         LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND
-*
-*        Now find  the  mantissa, t.  It should  be the  integer part of
-*        log to the base beta of a,  however it is safer to determine  t
-*        by powering.  So we find t as the smallest positive integer for
-*        which
-*
-*           fl( beta**t + 1.0 ) = 1.0.
-*
-         LT = 0
-         A = 1
-         C = 1
-*
-*+       WHILE( C.EQ.ONE )LOOP
-   30    CONTINUE
-         IF( C.EQ.ONE ) THEN
-            LT = LT + 1
-            A = A*LBETA
-            C = DLAMC3( A, ONE )
-            C = DLAMC3( C, -A )
-            GO TO 30
-         END IF
-*+       END WHILE
-*
-      END IF
-*
-      BETA = LBETA
-      T = LT
-      RND = LRND
-      IEEE1 = LIEEE1
-      FIRST = .FALSE.
-      RETURN
-*
-*     End of DLAMC1
-*
-      END
-*
-************************************************************************
-*
-      SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
-*
-*  -- LAPACK auxiliary routine (version 3.2) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
-*
-*     .. Scalar Arguments ..
-      LOGICAL            RND
-      INTEGER            BETA, EMAX, EMIN, T
-      DOUBLE PRECISION   EPS, RMAX, RMIN
-*     ..
-*
-*  Purpose
-*  =======
-*
-*  DLAMC2 determines the machine parameters specified in its argument
-*  list.
-*
-*  Arguments
-*  =========
-*
-*  BETA    (output) INTEGER
-*          The base of the machine.
-*
-*  T       (output) INTEGER
-*          The number of ( BETA ) digits in the mantissa.
-*
-*  RND     (output) LOGICAL
-*          Specifies whether proper rounding  ( RND = .TRUE. )  or
-*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
-*          be a reliable guide to the way in which the machine performs
-*          its arithmetic.
-*
-*  EPS     (output) DOUBLE PRECISION
-*          The smallest positive number such that
-*
-*             fl( 1.0 - EPS ) .LT. 1.0,
-*
-*          where fl denotes the computed value.
-*
-*  EMIN    (output) INTEGER
-*          The minimum exponent before (gradual) underflow occurs.
-*
-*  RMIN    (output) DOUBLE PRECISION
-*          The smallest normalized number for the machine, given by
-*          BASE**( EMIN - 1 ), where  BASE  is the floating point value
-*          of BETA.
-*
-*  EMAX    (output) INTEGER
-*          The maximum exponent before overflow occurs.
-*
-*  RMAX    (output) DOUBLE PRECISION
-*          The largest positive number for the machine, given by
-*          BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
-*          value of BETA.
-*
-*  Further Details
-*  ===============
-*
-*  The computation of  EPS  is based on a routine PARANOIA by
-*  W. Kahan of the University of California at Berkeley.
-*
-* =====================================================================
-*
-*     .. Local Scalars ..
-      LOGICAL            FIRST, IEEE, IWARN, LIEEE1, LRND
-      INTEGER            GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT,
-     $                   NGNMIN, NGPMIN
-      DOUBLE PRECISION   A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE,
-     $                   SIXTH, SMALL, THIRD, TWO, ZERO
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMC3
-      EXTERNAL           DLAMC3
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLAMC1, DLAMC4, DLAMC5
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          ABS, MAX, MIN
-*     ..
-*     .. Save statement ..
-      SAVE               FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX,
-     $                   LRMIN, LT
-*     ..
-*     .. Data statements ..
-      DATA               FIRST / .TRUE. / , IWARN / .FALSE. /
-*     ..
-*     .. Executable Statements ..
-*
-      IF( FIRST ) THEN
-         ZERO = 0
-         ONE = 1
-         TWO = 2
-*
-*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
-*        BETA, T, RND, EPS, EMIN and RMIN.
-*
-*        Throughout this routine  we use the function  DLAMC3  to ensure
-*        that relevant values are stored  and not held in registers,  or
-*        are not affected by optimizers.
-*
-*        DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
-*
-         CALL DLAMC1( LBETA, LT, LRND, LIEEE1 )
-*
-*        Start to find EPS.
-*
-         B = LBETA
-         A = B**( -LT )
-         LEPS = A
-*
-*        Try some tricks to see whether or not this is the correct  EPS.
-*
-         B = TWO / 3
-         HALF = ONE / 2
-         SIXTH = DLAMC3( B, -HALF )
-         THIRD = DLAMC3( SIXTH, SIXTH )
-         B = DLAMC3( THIRD, -HALF )
-         B = DLAMC3( B, SIXTH )
-         B = ABS( B )
-         IF( B.LT.LEPS )
-     $      B = LEPS
-*
-         LEPS = 1
-*
-*+       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP
-   10    CONTINUE
-         IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN
-            LEPS = B
-            C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) )
-            C = DLAMC3( HALF, -C )
-            B = DLAMC3( HALF, C )
-            C = DLAMC3( HALF, -B )
-            B = DLAMC3( HALF, C )
-            GO TO 10
-         END IF
-*+       END WHILE
-*
-         IF( A.LT.LEPS )
-     $      LEPS = A
-*
-*        Computation of EPS complete.
-*
-*        Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
-*        Keep dividing  A by BETA until (gradual) underflow occurs. This
-*        is detected when we cannot recover the previous A.
-*
-         RBASE = ONE / LBETA
-         SMALL = ONE
-         DO 20 I = 1, 3
-            SMALL = DLAMC3( SMALL*RBASE, ZERO )
-   20    CONTINUE
-         A = DLAMC3( ONE, SMALL )
-         CALL DLAMC4( NGPMIN, ONE, LBETA )
-         CALL DLAMC4( NGNMIN, -ONE, LBETA )
-         CALL DLAMC4( GPMIN, A, LBETA )
-         CALL DLAMC4( GNMIN, -A, LBETA )
-         IEEE = .FALSE.
-*
-         IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN
-            IF( NGPMIN.EQ.GPMIN ) THEN
-               LEMIN = NGPMIN
-*            ( Non twos-complement machines, no gradual underflow;
-*              e.g.,  VAX )
-            ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN
-               LEMIN = NGPMIN - 1 + LT
-               IEEE = .TRUE.
-*            ( Non twos-complement machines, with gradual underflow;
-*              e.g., IEEE standard followers )
-            ELSE
-               LEMIN = MIN( NGPMIN, GPMIN )
-*            ( A guess; no known machine )
-               IWARN = .TRUE.
-            END IF
-*
-         ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN
-            IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN
-               LEMIN = MAX( NGPMIN, NGNMIN )
-*            ( Twos-complement machines, no gradual underflow;
-*              e.g., CYBER 205 )
-            ELSE
-               LEMIN = MIN( NGPMIN, NGNMIN )
-*            ( A guess; no known machine )
-               IWARN = .TRUE.
-            END IF
-*
-         ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND.
-     $            ( GPMIN.EQ.GNMIN ) ) THEN
-            IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN
-               LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT
-*            ( Twos-complement machines with gradual underflow;
-*              no known machine )
-            ELSE
-               LEMIN = MIN( NGPMIN, NGNMIN )
-*            ( A guess; no known machine )
-               IWARN = .TRUE.
-            END IF
-*
-         ELSE
-            LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN )
-*         ( A guess; no known machine )
-            IWARN = .TRUE.
-         END IF
-         FIRST = .FALSE.
-***
-* Comment out this if block if EMIN is ok
-         IF( IWARN ) THEN
-            FIRST = .TRUE.
-            WRITE( 6, FMT = 9999 )LEMIN
-         END IF
-***
-*
-*        Assume IEEE arithmetic if we found denormalised  numbers above,
-*        or if arithmetic seems to round in the  IEEE style,  determined
-*        in routine DLAMC1. A true IEEE machine should have both  things
-*        true; however, faulty machines may have one or the other.
-*
-         IEEE = IEEE .OR. LIEEE1
-*
-*        Compute  RMIN by successive division by  BETA. We could compute
-*        RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
-*        this computation.
-*
-         LRMIN = 1
-         DO 30 I = 1, 1 - LEMIN
-            LRMIN = DLAMC3( LRMIN*RBASE, ZERO )
-   30    CONTINUE
-*
-*        Finally, call DLAMC5 to compute EMAX and RMAX.
-*
-         CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX )
-      END IF
-*
-      BETA = LBETA
-      T = LT
-      RND = LRND
-      EPS = LEPS
-      EMIN = LEMIN
-      RMIN = LRMIN
-      EMAX = LEMAX
-      RMAX = LRMAX
-*
-      RETURN
-*
- 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-',
-     $      '  EMIN = ', I8, /
-     $      ' If, after inspection, the value EMIN looks',
-     $      ' acceptable please comment out ',
-     $      / ' the IF block as marked within the code of routine',
-     $      ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / )
-*
-*     End of DLAMC2
-*
-      END
-*
 ************************************************************************
-*
+*> \brief \b DLAMC3
+*> \details
+*> \b Purpose:
+*> \verbatim
+*> DLAMC3  is intended to force  A  and  B  to be stored prior to doing
+*> the addition of  A  and  B ,  for use in situations where optimizers
+*> might hold one of these in a register.
+*> \endverbatim
+*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+*> \date December 2016
+*> \ingroup auxOTHERauxiliary
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is a DOUBLE PRECISION
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*>          B is a DOUBLE PRECISION
+*>          The values A and B.
+*> \endverbatim
+*>
       DOUBLE PRECISION FUNCTION DLAMC3( A, B )
 *
-*  -- LAPACK auxiliary routine (version 3.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
+*     November 2010
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   A, B
 *     ..
-*
-*  Purpose
-*  =======
-*
-*  DLAMC3  is intended to force  A  and  B  to be stored prior to doing
-*  the addition of  A  and  B ,  for use in situations where optimizers
-*  might hold one of these in a register.
-*
-*  Arguments
-*  =========
-*
-*  A       (input) DOUBLE PRECISION
-*  B       (input) DOUBLE PRECISION
-*          The values A and B.
-*
 * =====================================================================
 *
 *     .. Executable Statements ..
@@ -608,245 +187,3 @@
       END
 *
 ************************************************************************
-*
-      SUBROUTINE DLAMC4( EMIN, START, BASE )
-*
-*  -- LAPACK auxiliary routine (version 3.2) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
-*
-*     .. Scalar Arguments ..
-      INTEGER            BASE, EMIN
-      DOUBLE PRECISION   START
-*     ..
-*
-*  Purpose
-*  =======
-*
-*  DLAMC4 is a service routine for DLAMC2.
-*
-*  Arguments
-*  =========
-*
-*  EMIN    (output) INTEGER 
-*          The minimum exponent before (gradual) underflow, computed by
-*          setting A = START and dividing by BASE until the previous A
-*          can not be recovered.
-*
-*  START   (input) DOUBLE PRECISION
-*          The starting point for determining EMIN.
-*
-*  BASE    (input) INTEGER
-*          The base of the machine.
-*
-* =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER            I
-      DOUBLE PRECISION   A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMC3
-      EXTERNAL           DLAMC3
-*     ..
-*     .. Executable Statements ..
-*
-      A = START
-      ONE = 1
-      RBASE = ONE / BASE
-      ZERO = 0
-      EMIN = 1
-      B1 = DLAMC3( A*RBASE, ZERO )
-      C1 = A
-      C2 = A
-      D1 = A
-      D2 = A
-*+    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
-*    $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
-   10 CONTINUE
-      IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND.
-     $    ( D2.EQ.A ) ) THEN
-         EMIN = EMIN - 1
-         A = B1
-         B1 = DLAMC3( A / BASE, ZERO )
-         C1 = DLAMC3( B1*BASE, ZERO )
-         D1 = ZERO
-         DO 20 I = 1, BASE
-            D1 = D1 + B1
-   20    CONTINUE
-         B2 = DLAMC3( A*RBASE, ZERO )
-         C2 = DLAMC3( B2 / RBASE, ZERO )
-         D2 = ZERO
-         DO 30 I = 1, BASE
-            D2 = D2 + B2
-   30    CONTINUE
-         GO TO 10
-      END IF
-*+    END WHILE
-*
-      RETURN
-*
-*     End of DLAMC4
-*
-      END
-*
-************************************************************************
-*
-      SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX )
-*
-*  -- LAPACK auxiliary routine (version 3.2) --
-*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
-*     November 2006
-*
-*     .. Scalar Arguments ..
-      LOGICAL            IEEE
-      INTEGER            BETA, EMAX, EMIN, P
-      DOUBLE PRECISION   RMAX
-*     ..
-*
-*  Purpose
-*  =======
-*
-*  DLAMC5 attempts to compute RMAX, the largest machine floating-point
-*  number, without overflow.  It assumes that EMAX + abs(EMIN) sum
-*  approximately to a power of 2.  It will fail on machines where this
-*  assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
-*  EMAX = 28718).  It will also fail if the value supplied for EMIN is
-*  too large (i.e. too close to zero), probably with overflow.
-*
-*  Arguments
-*  =========
-*
-*  BETA    (input) INTEGER
-*          The base of floating-point arithmetic.
-*
-*  P       (input) INTEGER
-*          The number of base BETA digits in the mantissa of a
-*          floating-point value.
-*
-*  EMIN    (input) INTEGER
-*          The minimum exponent before (gradual) underflow.
-*
-*  IEEE    (input) LOGICAL
-*          A logical flag specifying whether or not the arithmetic
-*          system is thought to comply with the IEEE standard.
-*
-*  EMAX    (output) INTEGER
-*          The largest exponent before overflow
-*
-*  RMAX    (output) DOUBLE PRECISION
-*          The largest machine floating-point number.
-*
-* =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION   ZERO, ONE
-      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
-*     ..
-*     .. Local Scalars ..
-      INTEGER            EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP
-      DOUBLE PRECISION   OLDY, RECBAS, Y, Z
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION   DLAMC3
-      EXTERNAL           DLAMC3
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC          MOD
-*     ..
-*     .. Executable Statements ..
-*
-*     First compute LEXP and UEXP, two powers of 2 that bound
-*     abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
-*     approximately to the bound that is closest to abs(EMIN).
-*     (EMAX is the exponent of the required number RMAX).
-*
-      LEXP = 1
-      EXBITS = 1
-   10 CONTINUE
-      TRY = LEXP*2
-      IF( TRY.LE.( -EMIN ) ) THEN
-         LEXP = TRY
-         EXBITS = EXBITS + 1
-         GO TO 10
-      END IF
-      IF( LEXP.EQ.-EMIN ) THEN
-         UEXP = LEXP
-      ELSE
-         UEXP = TRY
-         EXBITS = EXBITS + 1
-      END IF
-*
-*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater
-*     than or equal to EMIN. EXBITS is the number of bits needed to
-*     store the exponent.
-*
-      IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN
-         EXPSUM = 2*LEXP
-      ELSE
-         EXPSUM = 2*UEXP
-      END IF
-*
-*     EXPSUM is the exponent range, approximately equal to
-*     EMAX - EMIN + 1 .
-*
-      EMAX = EXPSUM + EMIN - 1
-      NBITS = 1 + EXBITS + P
-*
-*     NBITS is the total number of bits needed to store a
-*     floating-point number.
-*
-      IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN
-*
-*        Either there are an odd number of bits used to store a
-*        floating-point number, which is unlikely, or some bits are
-*        not used in the representation of numbers, which is possible,
-*        (e.g. Cray machines) or the mantissa has an implicit bit,
-*        (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-*        most likely. We have to assume the last alternative.
-*        If this is true, then we need to reduce EMAX by one because
-*        there must be some way of representing zero in an implicit-bit
-*        system. On machines like Cray, we are reducing EMAX by one
-*        unnecessarily.
-*
-         EMAX = EMAX - 1
-      END IF
-*
-      IF( IEEE ) THEN
-*
-*        Assume we are on an IEEE machine which reserves one exponent
-*        for infinity and NaN.
-*
-         EMAX = EMAX - 1
-      END IF
-*
-*     Now create RMAX, the largest machine number, which should
-*     be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
-*
-*     First compute 1.0 - BETA**(-P), being careful that the
-*     result is less than 1.0 .
-*
-      RECBAS = ONE / BETA
-      Z = BETA - ONE
-      Y = ZERO
-      DO 20 I = 1, P
-         Z = Z*RECBAS
-         IF( Y.LT.ONE )
-     $      OLDY = Y
-         Y = DLAMC3( Y, Z )
-   20 CONTINUE
-      IF( Y.GE.ONE )
-     $   Y = OLDY
-*
-*     Now multiply by BETA**EMAX to get RMAX.
-*
-      DO 30 I = 1, EMAX
-         Y = DLAMC3( Y*BETA, ZERO )
-   30 CONTINUE
-*
-      RMAX = Y
-      RETURN
-*
-*     End of DLAMC5
-*
-      END
diff --git a/lib/linalg/dlamrg.f b/lib/linalg/dlamrg.f
index 7126053e8a43ebeb48290cbda628d4d82810681f..de19508e459695e7132a8fb7806b7310003d3333 100644
--- a/lib/linalg/dlamrg.f
+++ b/lib/linalg/dlamrg.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAMRG + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlamrg.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlamrg.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlamrg.f"> 
+*> Download DLAMRG + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlamrg.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlamrg.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlamrg.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            DTRD1, DTRD2, N1, N2
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            INDEX( * )
 *       DOUBLE PRECISION   A( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -50,7 +50,7 @@
 *> \param[in] N2
 *> \verbatim
 *>          N2 is INTEGER
-*>         These arguements contain the respective lengths of the two
+*>         These arguments contain the respective lengths of the two
 *>         sorted lists to be merged.
 *> \endverbatim
 *>
@@ -87,22 +87,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            DTRD1, DTRD2, N1, N2
diff --git a/lib/linalg/dlange.f b/lib/linalg/dlange.f
index bec815d1efc1a98ec3caa34e982d0f9624c5b13c..9dbf45e81827f751a7842d8011c48f8cdd507483 100644
--- a/lib/linalg/dlange.f
+++ b/lib/linalg/dlange.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLANGE + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlange.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlange.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlange.f"> 
+*> Download DLANGE + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlange.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlange.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlange.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM
 *       INTEGER            LDA, M, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleGEauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          NORM
diff --git a/lib/linalg/dlanst.f b/lib/linalg/dlanst.f
index 213b06ada0f8b083198f2e6e2f285d7488559ae4..e952e2dd212d40cfe38af43a6c0043aa9aa114a7 100644
--- a/lib/linalg/dlanst.f
+++ b/lib/linalg/dlanst.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLANST + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlanst.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlanst.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlanst.f"> 
+*> Download DLANST + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlanst.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlanst.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlanst.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM
 *       INTEGER            N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), E( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -88,22 +88,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          NORM
diff --git a/lib/linalg/dlansy.f b/lib/linalg/dlansy.f
index bc70ab8edacd3e015294c6faeb001ff7cfb70b12..2372fce0a8f4868a5f123ec9815c0cc1f6d6ce4c 100644
--- a/lib/linalg/dlansy.f
+++ b/lib/linalg/dlansy.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLANSY + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlansy.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlansy.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlansy.f"> 
+*> Download DLANSY + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlansy.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlansy.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlansy.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM, UPLO
 *       INTEGER            LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -110,22 +110,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleSYauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
diff --git a/lib/linalg/dlapy2.f b/lib/linalg/dlapy2.f
index d43b0d5d14ce0d6ec97a9eab066b0316ac2a8bf5..bc01829a24ed7ab572f8f5f11cadd53dc6b1589c 100644
--- a/lib/linalg/dlapy2.f
+++ b/lib/linalg/dlapy2.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAPY2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlapy2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlapy2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlapy2.f"> 
+*> Download DLAPY2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlapy2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlapy2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlapy2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DLAPY2( X, Y )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   X, Y
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -51,22 +51,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DLAPY2( X, Y )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   X, Y
@@ -82,20 +82,32 @@
 *     ..
 *     .. Local Scalars ..
       DOUBLE PRECISION   W, XABS, YABS, Z
+      LOGICAL            X_IS_NAN, Y_IS_NAN
+*     ..
+*     .. External Functions ..
+      LOGICAL            DISNAN
+      EXTERNAL           DISNAN
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
 *     .. Executable Statements ..
 *
-      XABS = ABS( X )
-      YABS = ABS( Y )
-      W = MAX( XABS, YABS )
-      Z = MIN( XABS, YABS )
-      IF( Z.EQ.ZERO ) THEN
-         DLAPY2 = W
-      ELSE
-         DLAPY2 = W*SQRT( ONE+( Z / W )**2 )
+      X_IS_NAN = DISNAN( X )
+      Y_IS_NAN = DISNAN( Y )
+      IF ( X_IS_NAN ) DLAPY2 = X
+      IF ( Y_IS_NAN ) DLAPY2 = Y
+*
+      IF ( .NOT.( X_IS_NAN.OR.Y_IS_NAN ) ) THEN
+         XABS = ABS( X )
+         YABS = ABS( Y )
+         W = MAX( XABS, YABS )
+         Z = MIN( XABS, YABS )
+         IF( Z.EQ.ZERO ) THEN
+            DLAPY2 = W
+         ELSE
+            DLAPY2 = W*SQRT( ONE+( Z / W )**2 )
+         END IF
       END IF
       RETURN
 *
diff --git a/lib/linalg/dlapy3.f b/lib/linalg/dlapy3.f
index 23feecc4478a3ed002b7bdd5ec5c4c00c98603f6..3bbba88875c8e0572b920b1bb7f009006e747806 100644
--- a/lib/linalg/dlapy3.f
+++ b/lib/linalg/dlapy3.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAPY3 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlapy3.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlapy3.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlapy3.f"> 
+*> Download DLAPY3 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlapy3.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlapy3.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlapy3.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   X, Y, Z
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -56,22 +56,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   X, Y, Z
diff --git a/lib/linalg/dlarf.f b/lib/linalg/dlarf.f
index 80dca69af7f6e36073fc5cbf1f6eef0ca7fb4a45..e99d0bb2a914323352b61232d134999c36f6d0df 100644
--- a/lib/linalg/dlarf.f
+++ b/lib/linalg/dlarf.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLARF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarf.f"> 
+*> Download DLARF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE
 *       INTEGER            INCV, LDC, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   C( LDC, * ), V( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -112,22 +112,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE
diff --git a/lib/linalg/dlarfb.f b/lib/linalg/dlarfb.f
index 17218478af31abe795d5972fec7d5c99937048f1..5b2cc2ba800f3d53bb1d990ad69ceebc6e4c105e 100644
--- a/lib/linalg/dlarfb.f
+++ b/lib/linalg/dlarfb.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLARFB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarfb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarfb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarfb.f"> 
+*> Download DLARFB + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarfb.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarfb.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarfb.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
 *                          T, LDT, C, LDC, WORK, LDWORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, SIDE, STOREV, TRANS
 *       INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   C( LDC, * ), T( LDT, * ), V( LDV, * ),
 *      $                   WORK( LDWORK, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -154,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2013
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -195,10 +195,10 @@
       SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
      $                   T, LDT, C, LDC, WORK, LDWORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2013
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, SIDE, STOREV, TRANS
@@ -217,12 +217,11 @@
 *     ..
 *     .. Local Scalars ..
       CHARACTER          TRANST
-      INTEGER            I, J, LASTV, LASTC, lastv2
+      INTEGER            I, J
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
-      INTEGER            ILADLR, ILADLC
-      EXTERNAL           LSAME, ILADLR, ILADLC
+      EXTERNAL           LSAME
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           DCOPY, DGEMM, DTRMM
@@ -252,58 +251,53 @@
 *
 *              Form  H * C  or  H**T * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTV = MAX( K, ILADLR( M, K, V, LDV ) )
-               LASTC = ILADLC( LASTV, N, C, LDC )
 *
 *              W := C**T * V  =  (C1**T * V1 + C2**T * V2)  (stored in WORK)
 *
 *              W := C1**T
 *
                DO 10 J = 1, K
-                  CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
    10          CONTINUE
 *
 *              W := W * V1
 *
-               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
 *
-*                 W := W + C2**T *V2
+*                 W := W + C2**T * V2
 *
-                  CALL DGEMM( 'Transpose', 'No transpose',
-     $                 LASTC, K, LASTV-K,
-     $                 ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T**T  or  W * T
 *
-               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V * W**T
 *
-               IF( LASTV.GT.K ) THEN
+               IF( M.GT.K ) THEN
 *
 *                 C2 := C2 - V2 * W**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 LASTV-K, LASTC, K,
-     $                 -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE,
-     $                 C( K+1, 1 ), LDC )
+                  CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
                END IF
 *
 *              W := W * V1**T
 *
-               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W**T
 *
                DO 30 J = 1, K
-                  DO 20 I = 1, LASTC
+                  DO 20 I = 1, N
                      C( J, I ) = C( J, I ) - WORK( I, J )
    20             CONTINUE
    30          CONTINUE
@@ -311,58 +305,53 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**T  where  C = ( C1  C2 )
-*
-               LASTV = MAX( K, ILADLR( N, K, V, LDV ) )
-               LASTC = ILADLR( M, LASTV, C, LDC )
 *
 *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 *
 *              W := C1
 *
                DO 40 J = 1, K
-                  CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
+                  CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
    40          CONTINUE
 *
 *              W := W * V1
 *
-               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
 *
 *                 W := W + C2 * V2
 *
-                  CALL DGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, K, LASTV-K,
-     $                 ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**T
 *
-               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V**T
 *
-               IF( LASTV.GT.K ) THEN
+               IF( N.GT.K ) THEN
 *
 *                 C2 := C2 - W * V2**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 LASTC, LASTV-K, K,
-     $                 -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE,
-     $                 C( 1, K+1 ), LDC )
+                  CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
                END IF
 *
 *              W := W * V1**T
 *
-               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 60 J = 1, K
-                  DO 50 I = 1, LASTC
+                  DO 50 I = 1, M
                      C( I, J ) = C( I, J ) - WORK( I, J )
    50             CONTINUE
    60          CONTINUE
@@ -378,36 +367,31 @@
 *
 *              Form  H * C  or  H**T * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTC = ILADLC( M, N, C, LDC )
 *
 *              W := C**T * V  =  (C1**T * V1 + C2**T * V2)  (stored in WORK)
 *
 *              W := C2**T
 *
                DO 70 J = 1, K
-                  CALL DCOPY( LASTC, C( M-K+J, 1 ), LDC,
-     $                 WORK( 1, J ), 1 )
+                  CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
    70          CONTINUE
 *
 *              W := W * V2
 *
-               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( M-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
                IF( M.GT.K ) THEN
 *
-*                 W := W + C1**T*V1
+*                 W := W + C1**T * V1
 *
-                  CALL DGEMM( 'Transpose', 'No transpose',
-     $                 LASTC, K, M-K, ONE, C, LDC, V, LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T**T  or  W * T
 *
-               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V * W**T
 *
@@ -415,57 +399,51 @@
 *
 *                 C1 := C1 - V1 * W**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 M-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK,
-     $                 ONE, C, LDC )
+                  CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC )
                END IF
 *
 *              W := W * V2**T
 *
-               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V( M-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
 *
 *              C2 := C2 - W**T
 *
                DO 90 J = 1, K
-                  DO 80 I = 1, LASTC
-                     C( M-K+J, I ) = C( M-K+J, I ) - WORK(I, J)
+                  DO 80 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
    80             CONTINUE
    90          CONTINUE
 *
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**T  where  C = ( C1  C2 )
-*
-               LASTC = ILADLR( M, N, C, LDC )
 *
 *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 *
 *              W := C2
 *
                DO 100 J = 1, K
-                  CALL DCOPY( LASTC, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+                  CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
   100          CONTINUE
 *
 *              W := W * V2
 *
-               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( N-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
                IF( N.GT.K ) THEN
 *
 *                 W := W + C1 * V1
 *
-                  CALL DGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, K, N-K, ONE, C, LDC, V, LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**T
 *
-               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V**T
 *
@@ -473,22 +451,20 @@
 *
 *                 C1 := C1 - W * V1**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV,
-     $                 ONE, C, LDC )
+                  CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
                END IF
 *
 *              W := W * V2**T
 *
-               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V( N-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
 *
 *              C2 := C2 - W
 *
                DO 120 J = 1, K
-                  DO 110 I = 1, LASTC
-                     C( I, N-K+J ) = C( I, N-K+J ) - WORK(I, J)
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
   110             CONTINUE
   120          CONTINUE
             END IF
@@ -505,58 +481,53 @@
 *
 *              Form  H * C  or  H**T * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTV = MAX( K, ILADLC( K, M, V, LDV ) )
-               LASTC = ILADLC( LASTV, N, C, LDC )
 *
 *              W := C**T * V**T  =  (C1**T * V1**T + C2**T * V2**T) (stored in WORK)
 *
 *              W := C1**T
 *
                DO 130 J = 1, K
-                  CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
   130          CONTINUE
 *
 *              W := W * V1**T
 *
-               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
 *
-*                 W := W + C2**T*V2**T
+*                 W := W + C2**T * V2**T
 *
-                  CALL DGEMM( 'Transpose', 'Transpose',
-     $                 LASTC, K, LASTV-K,
-     $                 ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
                END IF
 *
 *              W := W * T**T  or  W * T
 *
-               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V**T * W**T
 *
-               IF( LASTV.GT.K ) THEN
+               IF( M.GT.K ) THEN
 *
 *                 C2 := C2 - V2**T * W**T
 *
-                  CALL DGEMM( 'Transpose', 'Transpose',
-     $                 LASTV-K, LASTC, K,
-     $                 -ONE, V( 1, K+1 ), LDV, WORK, LDWORK,
-     $                 ONE, C( K+1, 1 ), LDC )
+                  CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
                END IF
 *
 *              W := W * V1
 *
-               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W**T
 *
                DO 150 J = 1, K
-                  DO 140 I = 1, LASTC
+                  DO 140 I = 1, N
                      C( J, I ) = C( J, I ) - WORK( I, J )
   140             CONTINUE
   150          CONTINUE
@@ -564,58 +535,53 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**T  where  C = ( C1  C2 )
-*
-               LASTV = MAX( K, ILADLC( K, N, V, LDV ) )
-               LASTC = ILADLR( M, LASTV, C, LDC )
 *
 *              W := C * V**T  =  (C1*V1**T + C2*V2**T)  (stored in WORK)
 *
 *              W := C1
 *
                DO 160 J = 1, K
-                  CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
+                  CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
   160          CONTINUE
 *
 *              W := W * V1**T
 *
-               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
 *
 *                 W := W + C2 * V2**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 LASTC, K, LASTV-K,
-     $                 ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV,
+     $                        ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**T
 *
-               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V
 *
-               IF( LASTV.GT.K ) THEN
+               IF( N.GT.K ) THEN
 *
 *                 C2 := C2 - W * V2
 *
-                  CALL DGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, LASTV-K, K,
-     $                 -ONE, WORK, LDWORK, V( 1, K+1 ), LDV,
-     $                 ONE, C( 1, K+1 ), LDC )
+                  CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
                END IF
 *
 *              W := W * V1
 *
-               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 180 J = 1, K
-                  DO 170 I = 1, LASTC
+                  DO 170 I = 1, M
                      C( I, J ) = C( I, J ) - WORK( I, J )
   170             CONTINUE
   180          CONTINUE
@@ -631,36 +597,31 @@
 *
 *              Form  H * C  or  H**T * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTC = ILADLC( M, N, C, LDC )
 *
 *              W := C**T * V**T  =  (C1**T * V1**T + C2**T * V2**T) (stored in WORK)
 *
 *              W := C2**T
 *
                DO 190 J = 1, K
-                  CALL DCOPY( LASTC, C( M-K+J, 1 ), LDC,
-     $                 WORK( 1, J ), 1 )
+                  CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
   190          CONTINUE
 *
 *              W := W * V2**T
 *
-               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, M-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
                IF( M.GT.K ) THEN
 *
 *                 W := W + C1**T * V1**T
 *
-                  CALL DGEMM( 'Transpose', 'Transpose',
-     $                 LASTC, K, M-K, ONE, C, LDC, V, LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C, LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T**T  or  W * T
 *
-               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V**T * W**T
 *
@@ -668,58 +629,51 @@
 *
 *                 C1 := C1 - V1**T * W**T
 *
-                  CALL DGEMM( 'Transpose', 'Transpose',
-     $                 M-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK,
-     $                 ONE, C, LDC )
+                  CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V, LDV, WORK, LDWORK, ONE, C, LDC )
                END IF
 *
 *              W := W * V2
 *
-               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, M-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
 *
 *              C2 := C2 - W**T
 *
                DO 210 J = 1, K
-                  DO 200 I = 1, LASTC
-                     C( M-K+J, I ) = C( M-K+J, I ) - WORK(I, J)
+                  DO 200 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
   200             CONTINUE
   210          CONTINUE
 *
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
-*              Form  C * H  or  C * H**T  where  C = ( C1  C2 )
-*
-               LASTC = ILADLR( M, N, C, LDC )
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
 *
 *              W := C * V**T  =  (C1*V1**T + C2*V2**T)  (stored in WORK)
 *
 *              W := C2
 *
                DO 220 J = 1, K
-                  CALL DCOPY( LASTC, C( 1, N-K+J ), 1,
-     $                 WORK( 1, J ), 1 )
+                  CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
   220          CONTINUE
 *
 *              W := W * V2**T
 *
-               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, N-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
                IF( N.GT.K ) THEN
 *
 *                 W := W + C1 * V1**T
 *
-                  CALL DGEMM( 'No transpose', 'Transpose',
-     $                 LASTC, K, N-K, ONE, C, LDC, V, LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**T
 *
-               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V
 *
@@ -727,22 +681,20 @@
 *
 *                 C1 := C1 - W * V1
 *
-                  CALL DGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV,
-     $                 ONE, C, LDC )
+                  CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
                END IF
 *
 *              W := W * V2
 *
-               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, N-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 240 J = 1, K
-                  DO 230 I = 1, LASTC
-                     C( I, N-K+J ) = C( I, N-K+J ) - WORK(I, J)
+                  DO 230 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
   230             CONTINUE
   240          CONTINUE
 *
diff --git a/lib/linalg/dlarfg.f b/lib/linalg/dlarfg.f
index ce91d33c1af5474e49f6ffb90303f8028218813c..cb177a57035aa4995a2d56cc4a70b765a9fdbf7a 100644
--- a/lib/linalg/dlarfg.f
+++ b/lib/linalg/dlarfg.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLARFG + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarfg.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarfg.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarfg.f"> 
+*> Download DLARFG + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarfg.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarfg.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarfg.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       DOUBLE PRECISION   ALPHA, TAU
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -94,22 +94,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/dlarft.f b/lib/linalg/dlarft.f
index bc1b53b2ce746387ae1476e64369a723c67d8712..e69a6b792ee3005a544f26fdbbde8014b96d2620 100644
--- a/lib/linalg/dlarft.f
+++ b/lib/linalg/dlarft.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLARFT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.f"> 
+*> Download DLARFT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -125,12 +125,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -163,10 +163,10 @@
 *  =====================================================================
       SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, STOREV
@@ -221,13 +221,13 @@
                   END DO
                   DO J = 1, I-1
                      T( J, I ) = -TAU( I ) * V( I , J )
-                  END DO   
+                  END DO
                   J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
 *
-                  CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), 
-     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, 
+                  CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ),
+     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
      $                        T( 1, I ), 1 )
                ELSE
 *                 Skip any trailing zeros.
@@ -236,7 +236,7 @@
                   END DO
                   DO J = 1, I-1
                      T( J, I ) = -TAU( I ) * V( J , I )
-                  END DO   
+                  END DO
                   J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
@@ -280,7 +280,7 @@
                      END DO
                      DO J = I+1, K
                         T( J, I ) = -TAU( I ) * V( N-K+I , J )
-                     END DO   
+                     END DO
                      J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
@@ -295,7 +295,7 @@
                      END DO
                      DO J = I+1, K
                         T( J, I ) = -TAU( I ) * V( J, N-K+I )
-                     END DO   
+                     END DO
                      J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
diff --git a/lib/linalg/dlartg.f b/lib/linalg/dlartg.f
index bf74c4365c3f0c3f86e9ca45d34f1d6bb7862f31..1c7c46f638b731c40f7ddab9a15e5321da46448d 100644
--- a/lib/linalg/dlartg.f
+++ b/lib/linalg/dlartg.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLARTG + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlartg.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlartg.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlartg.f"> 
+*> Download DLARTG + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlartg.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlartg.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlartg.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARTG( F, G, CS, SN, R )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   CS, F, G, R, SN
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -85,22 +85,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLARTG( F, G, CS, SN, R )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   CS, F, G, R, SN
diff --git a/lib/linalg/dlas2.f b/lib/linalg/dlas2.f
index 81077f940d3dbe6f589e2ebf2d3e79dc0dd96dbf..83873bc612822f7448d7d3698024c56c2d6c655f 100644
--- a/lib/linalg/dlas2.f
+++ b/lib/linalg/dlas2.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLAS2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlas2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlas2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlas2.f"> 
+*> Download DLAS2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlas2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlas2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlas2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   F, G, H, SSMAX, SSMIN
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -73,14 +73,14 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -107,10 +107,10 @@
 *  =====================================================================
       SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   F, G, H, SSMAX, SSMIN
diff --git a/lib/linalg/dlascl.f b/lib/linalg/dlascl.f
index 9b9b33c0c1482888394180502b453a93f62e8c26..03e1000a8784ae181cd73a76aee8acbb96448db4 100644
--- a/lib/linalg/dlascl.f
+++ b/lib/linalg/dlascl.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASCL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlascl.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlascl.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlascl.f"> 
+*> Download DLASCL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlascl.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlascl.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlascl.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          TYPE
 *       INTEGER            INFO, KL, KU, LDA, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -114,7 +114,11 @@
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
+*>          The leading dimension of the array A.
+*>          If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M);
+*>             TYPE = 'B', LDA >= KL+1;
+*>             TYPE = 'Q', LDA >= KU+1;
+*>             TYPE = 'Z', LDA >= 2*KL+KU+1.
 *> \endverbatim
 *>
 *> \param[out] INFO
@@ -127,22 +131,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          TYPE
diff --git a/lib/linalg/dlasd4.f b/lib/linalg/dlasd4.f
new file mode 100644
index 0000000000000000000000000000000000000000..8b4a8762c877ef4093462a88269209bb7e228c65
--- /dev/null
+++ b/lib/linalg/dlasd4.f
@@ -0,0 +1,1061 @@
+*> \brief \b DLASD4 computes the square root of the i-th updated eigenvalue of a positive symmetric rank-one modification to a positive diagonal matrix. Used by dbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD4 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd4.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd4.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd4.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            I, INFO, N
+*       DOUBLE PRECISION   RHO, SIGMA
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   D( * ), DELTA( * ), WORK( * ), Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> This subroutine computes the square root of the I-th updated
+*> eigenvalue of a positive symmetric rank-one modification to
+*> a positive diagonal matrix whose entries are given as the squares
+*> of the corresponding entries in the array d, and that
+*>
+*>        0 <= D(i) < D(j)  for  i < j
+*>
+*> and that RHO > 0. This is arranged by the calling routine, and is
+*> no loss in generality.  The rank-one modified system is thus
+*>
+*>        diag( D ) * diag( D ) +  RHO * Z * Z_transpose.
+*>
+*> where we assume the Euclidean norm of Z is 1.
+*>
+*> The method consists of approximating the rational functions in the
+*> secular equation by simpler interpolating rational functions.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The length of all arrays.
+*> \endverbatim
+*>
+*> \param[in] I
+*> \verbatim
+*>          I is INTEGER
+*>         The index of the eigenvalue to be computed.  1 <= I <= N.
+*> \endverbatim
+*>
+*> \param[in] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( N )
+*>         The original eigenvalues.  It is assumed that they are in
+*>         order, 0 <= D(I) < D(J)  for I < J.
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( N )
+*>         The components of the updating vector.
+*> \endverbatim
+*>
+*> \param[out] DELTA
+*> \verbatim
+*>          DELTA is DOUBLE PRECISION array, dimension ( N )
+*>         If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+*>         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+*>         contains the information necessary to construct the
+*>         (singular) eigenvectors.
+*> \endverbatim
+*>
+*> \param[in] RHO
+*> \verbatim
+*>          RHO is DOUBLE PRECISION
+*>         The scalar in the symmetric updating formula.
+*> \endverbatim
+*>
+*> \param[out] SIGMA
+*> \verbatim
+*>          SIGMA is DOUBLE PRECISION
+*>         The computed sigma_I, the I-th updated eigenvalue.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( N )
+*>         If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+*>         component.  If N = 1, then WORK( 1 ) = 1.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>         = 0:  successful exit
+*>         > 0:  if INFO = 1, the updating process failed.
+*> \endverbatim
+*
+*> \par Internal Parameters:
+*  =========================
+*>
+*> \verbatim
+*>  Logical variable ORGATI (origin-at-i?) is used for distinguishing
+*>  whether D(i) or D(i+1) is treated as the origin.
+*>
+*>            ORGATI = .true.    origin at i
+*>            ORGATI = .false.   origin at i+1
+*>
+*>  Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+*>  if we are working with THREE poles!
+*>
+*>  MAXIT is the maximum number of iterations allowed for each
+*>  eigenvalue.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ren-Cang Li, Computer Science Division, University of California
+*>     at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            I, INFO, N
+      DOUBLE PRECISION   RHO, SIGMA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DELTA( * ), WORK( * ), Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 400 )
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   THREE = 3.0D+0, FOUR = 4.0D+0, EIGHT = 8.0D+0,
+     $                   TEN = 10.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ORGATI, SWTCH, SWTCH3, GEOMAVG
+      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
+      DOUBLE PRECISION   A, B, C, DELSQ, DELSQ2, SQ2, DPHI, DPSI, DTIIM,
+     $                   DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS,
+     $                   ERRETM, ETA, PHI, PREW, PSI, RHOINV, SGLB,
+     $                   SGUB, TAU, TAU2, TEMP, TEMP1, TEMP2, W
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DD( 3 ), ZZ( 3 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAED6, DLASD5
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Since this routine is called in an inner loop, we do no argument
+*     checking.
+*
+*     Quick return for N=1 and 2.
+*
+      INFO = 0
+      IF( N.EQ.1 ) THEN
+*
+*        Presumably, I=1 upon entry
+*
+         SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) )
+         DELTA( 1 ) = ONE
+         WORK( 1 ) = ONE
+         RETURN
+      END IF
+      IF( N.EQ.2 ) THEN
+         CALL DLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK )
+         RETURN
+      END IF
+*
+*     Compute machine epsilon
+*
+      EPS = DLAMCH( 'Epsilon' )
+      RHOINV = ONE / RHO
+      TAU2= ZERO
+*
+*     The case I = N
+*
+      IF( I.EQ.N ) THEN
+*
+*        Initialize some basic variables
+*
+         II = N - 1
+         NITER = 1
+*
+*        Calculate initial guess
+*
+         TEMP = RHO / TWO
+*
+*        If ||Z||_2 is not one, then TEMP should be set to
+*        RHO * ||Z||_2^2 / TWO
+*
+         TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) )
+         DO 10 J = 1, N
+            WORK( J ) = D( J ) + D( N ) + TEMP1
+            DELTA( J ) = ( D( J )-D( N ) ) - TEMP1
+   10    CONTINUE
+*
+         PSI = ZERO
+         DO 20 J = 1, N - 2
+            PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) )
+   20    CONTINUE
+*
+         C = RHOINV + PSI
+         W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) +
+     $       Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) )
+*
+         IF( W.LE.ZERO ) THEN
+            TEMP1 = SQRT( D( N )*D( N )+RHO )
+            TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )*
+     $             ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) +
+     $             Z( N )*Z( N ) / RHO
+*
+*           The following TAU2 is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( C.LE.TEMP ) THEN
+               TAU = RHO
+            ELSE
+               DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+               A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+               B = Z( N )*Z( N )*DELSQ
+               IF( A.LT.ZERO ) THEN
+                  TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+               ELSE
+                  TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+               END IF
+               TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) )
+            END IF
+*
+*           It can be proved that
+*               D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU2 <= D(N)^2+RHO
+*
+         ELSE
+            DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+            A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+            B = Z( N )*Z( N )*DELSQ
+*
+*           The following TAU2 is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( A.LT.ZERO ) THEN
+               TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+            ELSE
+               TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+            END IF
+            TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) )
+
+*
+*           It can be proved that
+*           D(N)^2 < D(N)^2+TAU2 < SIGMA(N)^2 < D(N)^2+RHO/2
+*
+         END IF
+*
+*        The following TAU is to approximate SIGMA_n - D( N )
+*
+*         TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) )
+*
+         SIGMA = D( N ) + TAU
+         DO 30 J = 1, N
+            DELTA( J ) = ( D( J )-D( N ) ) - TAU
+            WORK( J ) = D( J ) + D( N ) + TAU
+   30    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 40 J = 1, II
+            TEMP = Z( J ) / ( DELTA( J )*WORK( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   40    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / ( DELTA( N )*WORK( N ) )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV
+*    $          + ABS( TAU2 )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+         DTNSQ = WORK( N )*DELTA( N )
+         C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+         A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI )
+         B = DTNSQ*DTNSQ1*W
+         IF( C.LT.ZERO )
+     $      C = ABS( C )
+         IF( C.EQ.ZERO ) THEN
+            ETA = RHO - SIGMA*SIGMA
+         ELSE IF( A.GE.ZERO ) THEN
+            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GT.ZERO )
+     $      ETA = -W / ( DPSI+DPHI )
+         TEMP = ETA - DTNSQ
+         IF( TEMP.GT.RHO )
+     $      ETA = RHO + DTNSQ
+*
+         ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+         TAU = TAU + ETA
+         SIGMA = SIGMA + ETA
+*
+         DO 50 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+            WORK( J ) = WORK( J ) + ETA
+   50    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 60 J = 1, II
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   60    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TAU2 = WORK( N )*DELTA( N )
+         TEMP = Z( N ) / TAU2
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV
+*    $          + ABS( TAU2 )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 90 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               GO TO 240
+            END IF
+*
+*           Calculate the new step
+*
+            DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+            DTNSQ = WORK( N )*DELTA( N )
+            C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+            A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI )
+            B = DTNSQ1*DTNSQ*W
+            IF( A.GE.ZERO ) THEN
+               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GT.ZERO )
+     $         ETA = -W / ( DPSI+DPHI )
+            TEMP = ETA - DTNSQ
+            IF( TEMP.LE.ZERO )
+     $         ETA = ETA / TWO
+*
+            ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+            TAU = TAU + ETA
+            SIGMA = SIGMA + ETA
+*
+            DO 70 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+               WORK( J ) = WORK( J ) + ETA
+   70       CONTINUE
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 80 J = 1, II
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+   80       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            TAU2 = WORK( N )*DELTA( N )
+            TEMP = Z( N ) / TAU2
+            PHI = Z( N )*TEMP
+            DPHI = TEMP*TEMP
+            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV
+*    $             + ABS( TAU2 )*( DPSI+DPHI )
+*
+            W = RHOINV + PHI + PSI
+   90    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         GO TO 240
+*
+*        End for the case I = N
+*
+      ELSE
+*
+*        The case for I < N
+*
+         NITER = 1
+         IP1 = I + 1
+*
+*        Calculate initial guess
+*
+         DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) )
+         DELSQ2 = DELSQ / TWO
+         SQ2=SQRT( ( D( I )*D( I )+D( IP1 )*D( IP1 ) ) / TWO )
+         TEMP = DELSQ2 / ( D( I )+SQ2 )
+         DO 100 J = 1, N
+            WORK( J ) = D( J ) + D( I ) + TEMP
+            DELTA( J ) = ( D( J )-D( I ) ) - TEMP
+  100    CONTINUE
+*
+         PSI = ZERO
+         DO 110 J = 1, I - 1
+            PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  110    CONTINUE
+*
+         PHI = ZERO
+         DO 120 J = N, I + 2, -1
+            PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  120    CONTINUE
+         C = RHOINV + PSI + PHI
+         W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) +
+     $       Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) )
+*
+         GEOMAVG = .FALSE.
+         IF( W.GT.ZERO ) THEN
+*
+*           d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+*
+*           We choose d(i) as origin.
+*
+            ORGATI = .TRUE.
+            II = I
+            SGLB = ZERO
+            SGUB = DELSQ2  / ( D( I )+SQ2 )
+            A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
+            B = Z( I )*Z( I )*DELSQ
+            IF( A.GT.ZERO ) THEN
+               TAU2 = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            ELSE
+               TAU2 = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU2 now is an estimation of SIGMA^2 - D( I )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( I ).
+*
+            TAU = TAU2 / ( D( I )+SQRT( D( I )*D( I )+TAU2 ) )
+            TEMP = SQRT(EPS)
+            IF( (D(I).LE.TEMP*D(IP1)).AND.(ABS(Z(I)).LE.TEMP)
+     $                               .AND.(D(I).GT.ZERO) ) THEN
+               TAU = MIN( TEN*D(I), SGUB )
+               GEOMAVG = .TRUE.
+            END IF
+         ELSE
+*
+*           (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+*
+*           We choose d(i+1) as origin.
+*
+            ORGATI = .FALSE.
+            II = IP1
+            SGLB = -DELSQ2  / ( D( II )+SQ2 )
+            SGUB = ZERO
+            A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
+            B = Z( IP1 )*Z( IP1 )*DELSQ
+            IF( A.LT.ZERO ) THEN
+               TAU2 = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
+            ELSE
+               TAU2 = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU2 now is an estimation of SIGMA^2 - D( IP1 )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( IP1 ).
+*
+            TAU = TAU2 / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+
+     $            TAU2 ) ) )
+         END IF
+*
+         SIGMA = D( II ) + TAU
+         DO 130 J = 1, N
+            WORK( J ) = D( J ) + D( II ) + TAU
+            DELTA( J ) = ( D( J )-D( II ) ) - TAU
+  130    CONTINUE
+         IIM1 = II - 1
+         IIP1 = II + 1
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 150 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  150    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 160 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  160    CONTINUE
+*
+         W = RHOINV + PHI + PSI
+*
+*        W is the value of the secular function with
+*        its ii-th element removed.
+*
+         SWTCH3 = .FALSE.
+         IF( ORGATI ) THEN
+            IF( W.LT.ZERO )
+     $         SWTCH3 = .TRUE.
+         ELSE
+            IF( W.GT.ZERO )
+     $         SWTCH3 = .TRUE.
+         END IF
+         IF( II.EQ.1 .OR. II.EQ.N )
+     $      SWTCH3 = .FALSE.
+*
+         TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = W + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV
+     $          + THREE*ABS( TEMP )
+*    $          + ABS( TAU2 )*DW
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            SGLB = MAX( SGLB, TAU )
+         ELSE
+            SGUB = MIN( SGUB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         IF( .NOT.SWTCH3 ) THEN
+            DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+            DTISQ = WORK( I )*DELTA( I )
+            IF( ORGATI ) THEN
+               C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+            ELSE
+               C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+            END IF
+            A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+            B = DTIPSQ*DTISQ*W
+            IF( C.EQ.ZERO ) THEN
+               IF( A.EQ.ZERO ) THEN
+                  IF( ORGATI ) THEN
+                     A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI )
+                  ELSE
+                     A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI )
+                  END IF
+               END IF
+               ETA = B / A
+            ELSE IF( A.LE.ZERO ) THEN
+               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+         ELSE
+*
+*           Interpolation using THREE most relevant poles
+*
+            DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+            DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+            TEMP = RHOINV + PSI + PHI
+            IF( ORGATI ) THEN
+               TEMP1 = Z( IIM1 ) / DTIIM
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIP*( DPSI+DPHI ) ) -
+     $             ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+               IF( DPSI.LT.TEMP1 ) THEN
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+               END IF
+            ELSE
+               TEMP1 = Z( IIP1 ) / DTIIP
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIM*( DPSI+DPHI ) ) -
+     $             ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               IF( DPHI.LT.TEMP1 ) THEN
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+               ELSE
+                  ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+               END IF
+               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+            END IF
+            ZZ( 2 ) = Z( II )*Z( II )
+            DD( 1 ) = DTIIM
+            DD( 2 ) = DELTA( II )*WORK( II )
+            DD( 3 ) = DTIIP
+            CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+*
+            IF( INFO.NE.0 ) THEN
+*
+*              If INFO is not 0, i.e., DLAED6 failed, switch back
+*              to 2 pole interpolation.
+*
+               SWTCH3 = .FALSE.
+               INFO = 0
+               DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+               DTISQ = WORK( I )*DELTA( I )
+               IF( ORGATI ) THEN
+                  C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+               ELSE
+                  C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+               END IF
+               A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+               B = DTIPSQ*DTISQ*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( ORGATI ) THEN
+                        A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI )
+                     ELSE
+                        A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI)
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            END IF
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GE.ZERO )
+     $      ETA = -W / DW
+*
+         ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+         TEMP = TAU + ETA
+         IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( SGUB-TAU ) / TWO
+            ELSE
+               ETA = ( SGLB-TAU ) / TWO
+            END IF
+            IF( GEOMAVG ) THEN
+               IF( W .LT. ZERO ) THEN
+                  IF( TAU .GT. ZERO ) THEN
+                     ETA = SQRT(SGUB*TAU)-TAU
+                  END IF
+               ELSE
+                  IF( SGLB .GT. ZERO ) THEN
+                     ETA = SQRT(SGLB*TAU)-TAU
+                  END IF
+               END IF
+            END IF
+         END IF
+*
+         PREW = W
+*
+         TAU = TAU + ETA
+         SIGMA = SIGMA + ETA
+*
+         DO 170 J = 1, N
+            WORK( J ) = WORK( J ) + ETA
+            DELTA( J ) = DELTA( J ) - ETA
+  170    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 180 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  180    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 190 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  190    CONTINUE
+*
+         TAU2 = WORK( II )*DELTA( II )
+         TEMP = Z( II ) / TAU2
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = RHOINV + PHI + PSI + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV
+     $          + THREE*ABS( TEMP )
+*    $          + ABS( TAU2 )*DW
+*
+         SWTCH = .FALSE.
+         IF( ORGATI ) THEN
+            IF( -W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         ELSE
+            IF( W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         END IF
+*
+*        Main loop to update the values of the array   DELTA and WORK
+*
+         ITER = NITER + 1
+*
+         DO 230 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+*     $          .OR. (SGUB-SGLB).LE.EIGHT*ABS(SGUB+SGLB) ) THEN
+               GO TO 240
+            END IF
+*
+            IF( W.LE.ZERO ) THEN
+               SGLB = MAX( SGLB, TAU )
+            ELSE
+               SGUB = MIN( SGUB, TAU )
+            END IF
+*
+*           Calculate the new step
+*
+            IF( .NOT.SWTCH3 ) THEN
+               DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+               DTISQ = WORK( I )*DELTA( I )
+               IF( .NOT.SWTCH ) THEN
+                  IF( ORGATI ) THEN
+                     C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+                  ELSE
+                     C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+                  END IF
+               ELSE
+                  TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+                  IF( ORGATI ) THEN
+                     DPSI = DPSI + TEMP*TEMP
+                  ELSE
+                     DPHI = DPHI + TEMP*TEMP
+                  END IF
+                  C = W - DTISQ*DPSI - DTIPSQ*DPHI
+               END IF
+               A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+               B = DTIPSQ*DTISQ*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( .NOT.SWTCH ) THEN
+                        IF( ORGATI ) THEN
+                           A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*
+     $                         ( DPSI+DPHI )
+                        ELSE
+                           A = Z( IP1 )*Z( IP1 ) +
+     $                         DTISQ*DTISQ*( DPSI+DPHI )
+                        END IF
+                     ELSE
+                        A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            ELSE
+*
+*              Interpolation using THREE most relevant poles
+*
+               DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+               DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+               TEMP = RHOINV + PSI + PHI
+               IF( SWTCH ) THEN
+                  C = TEMP - DTIIM*DPSI - DTIIP*DPHI
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  IF( ORGATI ) THEN
+                     TEMP1 = Z( IIM1 ) / DTIIM
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIM1 )-D( IIP1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2
+                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+                     IF( DPSI.LT.TEMP1 ) THEN
+                        ZZ( 3 ) = DTIIP*DTIIP*DPHI
+                     ELSE
+                        ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+                     END IF
+                  ELSE
+                     TEMP1 = Z( IIP1 ) / DTIIP
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIP1 )-D( IIM1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2
+                     IF( DPHI.LT.TEMP1 ) THEN
+                        ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                     ELSE
+                        ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+                     END IF
+                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+                  END IF
+               END IF
+               DD( 1 ) = DTIIM
+               DD( 2 ) = DELTA( II )*WORK( II )
+               DD( 3 ) = DTIIP
+               CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+*
+               IF( INFO.NE.0 ) THEN
+*
+*                 If INFO is not 0, i.e., DLAED6 failed, switch
+*                 back to two pole interpolation
+*
+                  SWTCH3 = .FALSE.
+                  INFO = 0
+                  DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+                  DTISQ = WORK( I )*DELTA( I )
+                  IF( .NOT.SWTCH ) THEN
+                     IF( ORGATI ) THEN
+                        C = W - DTIPSQ*DW + DELSQ*( Z( I )/DTISQ )**2
+                     ELSE
+                        C = W - DTISQ*DW - DELSQ*( Z( IP1 )/DTIPSQ )**2
+                     END IF
+                  ELSE
+                     TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+                     IF( ORGATI ) THEN
+                        DPSI = DPSI + TEMP*TEMP
+                     ELSE
+                        DPHI = DPHI + TEMP*TEMP
+                     END IF
+                     C = W - DTISQ*DPSI - DTIPSQ*DPHI
+                  END IF
+                  A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+                  B = DTIPSQ*DTISQ*W
+                  IF( C.EQ.ZERO ) THEN
+                     IF( A.EQ.ZERO ) THEN
+                        IF( .NOT.SWTCH ) THEN
+                           IF( ORGATI ) THEN
+                              A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*
+     $                            ( DPSI+DPHI )
+                           ELSE
+                              A = Z( IP1 )*Z( IP1 ) +
+     $                            DTISQ*DTISQ*( DPSI+DPHI )
+                           END IF
+                        ELSE
+                           A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI
+                        END IF
+                     END IF
+                     ETA = B / A
+                  ELSE IF( A.LE.ZERO ) THEN
+                     ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+                  ELSE
+                     ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+                  END IF
+               END IF
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GE.ZERO )
+     $         ETA = -W / DW
+*
+            ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+            TEMP=TAU+ETA
+            IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( SGUB-TAU ) / TWO
+               ELSE
+                  ETA = ( SGLB-TAU ) / TWO
+               END IF
+               IF( GEOMAVG ) THEN
+                  IF( W .LT. ZERO ) THEN
+                     IF( TAU .GT. ZERO ) THEN
+                        ETA = SQRT(SGUB*TAU)-TAU
+                     END IF
+                  ELSE
+                     IF( SGLB .GT. ZERO ) THEN
+                        ETA = SQRT(SGLB*TAU)-TAU
+                     END IF
+                  END IF
+               END IF
+            END IF
+*
+            PREW = W
+*
+            TAU = TAU + ETA
+            SIGMA = SIGMA + ETA
+*
+            DO 200 J = 1, N
+               WORK( J ) = WORK( J ) + ETA
+               DELTA( J ) = DELTA( J ) - ETA
+  200       CONTINUE
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 210 J = 1, IIM1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+  210       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            DPHI = ZERO
+            PHI = ZERO
+            DO 220 J = N, IIP1, -1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PHI = PHI + Z( J )*TEMP
+               DPHI = DPHI + TEMP*TEMP
+               ERRETM = ERRETM + PHI
+  220       CONTINUE
+*
+            TAU2 = WORK( II )*DELTA( II )
+            TEMP = Z( II ) / TAU2
+            DW = DPSI + DPHI + TEMP*TEMP
+            TEMP = Z( II )*TEMP
+            W = RHOINV + PHI + PSI + TEMP
+            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV
+     $             + THREE*ABS( TEMP )
+*    $             + ABS( TAU2 )*DW
+*
+            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
+     $         SWTCH = .NOT.SWTCH
+*
+  230    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+*
+      END IF
+*
+  240 CONTINUE
+      RETURN
+*
+*     End of DLASD4
+*
+      END
diff --git a/lib/linalg/dlasd5.f b/lib/linalg/dlasd5.f
new file mode 100644
index 0000000000000000000000000000000000000000..4896ba6b97933679049f70b83afa1f45cb7796db
--- /dev/null
+++ b/lib/linalg/dlasd5.f
@@ -0,0 +1,231 @@
+*> \brief \b DLASD5 computes the square root of the i-th eigenvalue of a positive symmetric rank-one modification of a 2-by-2 diagonal matrix. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD5 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd5.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd5.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd5.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            I
+*       DOUBLE PRECISION   DSIGMA, RHO
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> This subroutine computes the square root of the I-th eigenvalue
+*> of a positive symmetric rank-one modification of a 2-by-2 diagonal
+*> matrix
+*>
+*>            diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
+*>
+*> The diagonal entries in the array D are assumed to satisfy
+*>
+*>            0 <= D(i) < D(j)  for  i < j .
+*>
+*> We also assume RHO > 0 and that the Euclidean norm of the vector
+*> Z is one.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] I
+*> \verbatim
+*>          I is INTEGER
+*>         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*> \endverbatim
+*>
+*> \param[in] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( 2 )
+*>         The original eigenvalues.  We assume 0 <= D(1) < D(2).
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( 2 )
+*>         The components of the updating vector.
+*> \endverbatim
+*>
+*> \param[out] DELTA
+*> \verbatim
+*>          DELTA is DOUBLE PRECISION array, dimension ( 2 )
+*>         Contains (D(j) - sigma_I) in its  j-th component.
+*>         The vector DELTA contains the information necessary
+*>         to construct the eigenvectors.
+*> \endverbatim
+*>
+*> \param[in] RHO
+*> \verbatim
+*>          RHO is DOUBLE PRECISION
+*>         The scalar in the symmetric updating formula.
+*> \endverbatim
+*>
+*> \param[out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION
+*>         The computed sigma_I, the I-th updated eigenvalue.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( 2 )
+*>         WORK contains (D(j) + sigma_I) in its  j-th component.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ren-Cang Li, Computer Science Division, University of California
+*>     at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      DOUBLE PRECISION   DSIGMA, RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   THREE = 3.0D+0, FOUR = 4.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   B, C, DEL, DELSQ, TAU, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      DELSQ = DEL*( D( 2 )+D( 1 ) )
+      IF( I.EQ.1 ) THEN
+         W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )-
+     $       Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DELSQ
+*
+*           B > ZERO, always
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+*
+*           The following TAU is DSIGMA - D( 1 )
+*
+            TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) )
+            DSIGMA = D( 1 ) + TAU
+            DELTA( 1 ) = -TAU
+            DELTA( 2 ) = DEL - TAU
+            WORK( 1 ) = TWO*D( 1 ) + TAU
+            WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 )
+*           DELTA( 1 ) = -Z( 1 ) / TAU
+*           DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+*
+*           The following TAU is DSIGMA - D( 2 )
+*
+            TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) )
+            DSIGMA = D( 2 ) + TAU
+            DELTA( 1 ) = -( DEL+TAU )
+            DELTA( 2 ) = -TAU
+            WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+            WORK( 2 ) = TWO*D( 2 ) + TAU
+*           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*           DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*        Now I=2
+*
+         B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+*
+*        The following TAU is DSIGMA - D( 2 )
+*
+         TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) )
+         DSIGMA = D( 2 ) + TAU
+         DELTA( 1 ) = -( DEL+TAU )
+         DELTA( 2 ) = -TAU
+         WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+         WORK( 2 ) = TWO*D( 2 ) + TAU
+*        DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*        DELTA( 2 ) = -Z( 2 ) / TAU
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End of DLASD5
+*
+      END
diff --git a/lib/linalg/dlasd6.f b/lib/linalg/dlasd6.f
new file mode 100644
index 0000000000000000000000000000000000000000..5cab78a07081eb56d7ed0a36f2194ad9e3776551
--- /dev/null
+++ b/lib/linalg/dlasd6.f
@@ -0,0 +1,443 @@
+*> \brief \b DLASD6 computes the SVD of an updated upper bidiagonal matrix obtained by merging two smaller ones by appending a row. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD6 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd6.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd6.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd6.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+*                          IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+*                          LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+*                          IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+*      $                   NR, SQRE
+*       DOUBLE PRECISION   ALPHA, BETA, C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+*      $                   PERM( * )
+*       DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( * ),
+*      $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+*      $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+*> obtained by merging two smaller ones by appending a row. This
+*> routine is used only for the problem which requires all singular
+*> values and optionally singular vector matrices in factored form.
+*> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+*> A related subroutine, DLASD1, handles the case in which all singular
+*> values and singular vectors of the bidiagonal matrix are desired.
+*>
+*> DLASD6 computes the SVD as follows:
+*>
+*>               ( D1(in)    0    0       0 )
+*>   B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
+*>               (   0       0   D2(in)   0 )
+*>
+*>     = U(out) * ( D(out) 0) * VT(out)
+*>
+*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
+*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*> elsewhere; and the entry b is empty if SQRE = 0.
+*>
+*> The singular values of B can be computed using D1, D2, the first
+*> components of all the right singular vectors of the lower block, and
+*> the last components of all the right singular vectors of the upper
+*> block. These components are stored and updated in VF and VL,
+*> respectively, in DLASD6. Hence U and VT are not explicitly
+*> referenced.
+*>
+*> The singular values are stored in D. The algorithm consists of two
+*> stages:
+*>
+*>       The first stage consists of deflating the size of the problem
+*>       when there are multiple singular values or if there is a zero
+*>       in the Z vector. For each such occurrence the dimension of the
+*>       secular equation problem is reduced by one. This stage is
+*>       performed by the routine DLASD7.
+*>
+*>       The second stage consists of calculating the updated
+*>       singular values. This is done by finding the roots of the
+*>       secular equation via the routine DLASD4 (as called by DLASD8).
+*>       This routine also updates VF and VL and computes the distances
+*>       between the updated singular values and the old singular
+*>       values.
+*>
+*> DLASD6 is called from DLASDA.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed in
+*>         factored form:
+*>         = 0: Compute singular values only.
+*>         = 1: Compute singular vectors in factored form as well.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block.  NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block.  NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*>         and column dimension M = N + SQRE.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+*>         On entry D(1:NL,1:NL) contains the singular values of the
+*>         upper block, and D(NL+2:N) contains the singular values
+*>         of the lower block. On exit D(1:N) contains the singular
+*>         values of the modified matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VF(1:NL+1) contains the first components of all
+*>         right singular vectors of the upper block; and VF(NL+2:M)
+*>         contains the first components of all right singular vectors
+*>         of the lower block. On exit, VF contains the first components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VL(1:NL+1) contains the  last components of all
+*>         right singular vectors of the upper block; and VL(NL+2:M)
+*>         contains the last components of all right singular vectors of
+*>         the lower block. On exit, VL contains the last components of
+*>         all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in,out] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISION
+*>         Contains the diagonal element associated with the added row.
+*> \endverbatim
+*>
+*> \param[in,out] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISION
+*>         Contains the off-diagonal element associated with the added
+*>         row.
+*> \endverbatim
+*>
+*> \param[in,out] IDXQ
+*> \verbatim
+*>          IDXQ is INTEGER array, dimension ( N )
+*>         This contains the permutation which will reintegrate the
+*>         subproblem just solved back into sorted order, i.e.
+*>         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) to be applied
+*>         to each block. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of columns to take place
+*>         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value to be used in the
+*>         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of GIVNUM and POLES, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         On exit, POLES(1,*) is an array containing the new singular
+*>         values obtained from solving the secular equation, and
+*>         POLES(2,*) is an array containing the poles in the secular
+*>         equation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( N )
+*>         On exit, DIFL(I) is the distance between I-th updated
+*>         (undeflated) singular value and the I-th (undeflated) old
+*>         singular value.
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*>                   dimension ( K ) if ICOMPQ = 0.
+*>          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*>          defined and will not be referenced.
+*>
+*>          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*>          normalizing factors for the right singular vector matrix.
+*>
+*>         See DLASD8 for details on DIFL and DIFR.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( M )
+*>         The first elements of this array contain the components
+*>         of the deflation-adjusted updating row vector.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix,
+*>         This is the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( 4 * M )
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension ( 3 * N )
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+     $                   IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+     $                   LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+     $                   PERM( * )
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M,
+     $                   N, N1, N2
+      DOUBLE PRECISION   ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DLASCL, DLASD7, DLASD8, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -14
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD6', -INFO )
+         RETURN
+      END IF
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLASD7 and DLASD8.
+*
+      ISIGMA = 1
+      IW = ISIGMA + N
+      IVFW = IW + M
+      IVLW = IVFW + M
+*
+      IDX = 1
+      IDXC = IDX + N
+      IDXP = IDXC + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Sort and Deflate singular values.
+*
+      CALL DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF,
+     $             WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA,
+     $             WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ,
+     $             PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S,
+     $             INFO )
+*
+*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL.
+*
+      CALL DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM,
+     $             WORK( ISIGMA ), WORK( IW ), INFO )
+*
+*     Report the possible convergence failure.
+*
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Save the poles if ICOMPQ = 1.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL DCOPY( K, D, 1, POLES( 1, 1 ), 1 )
+         CALL DCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 )
+      END IF
+*
+*     Unscale.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL DLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of DLASD6
+*
+      END
diff --git a/lib/linalg/dlasd7.f b/lib/linalg/dlasd7.f
new file mode 100644
index 0000000000000000000000000000000000000000..e0ddedeb577142ec443aa28f14525c3e2d22e3f8
--- /dev/null
+++ b/lib/linalg/dlasd7.f
@@ -0,0 +1,580 @@
+*> \brief \b DLASD7 merges the two sets of singular values together into a single sorted set. Then it tries to deflate the size of the problem. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD7 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd7.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd7.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd7.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+*                          VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+*                          PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+*                          C, S, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+*      $                   NR, SQRE
+*       DOUBLE PRECISION   ALPHA, BETA, C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+*      $                   IDXQ( * ), PERM( * )
+*       DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+*      $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+*      $                   ZW( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD7 merges the two sets of singular values together into a single
+*> sorted set. Then it tries to deflate the size of the problem. There
+*> are two ways in which deflation can occur:  when two or more singular
+*> values are close together or if there is a tiny entry in the Z
+*> vector. For each such occurrence the order of the related
+*> secular equation problem is reduced by one.
+*>
+*> DLASD7 is called from DLASD6.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>          Specifies whether singular vectors are to be computed
+*>          in compact form, as follows:
+*>          = 0: Compute singular values only.
+*>          = 1: Compute singular vectors of upper
+*>               bidiagonal matrix in compact form.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block. NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block. NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has
+*>         N = NL + NR + 1 rows and
+*>         M = N + SQRE >= N columns.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix, this is
+*>         the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( N )
+*>         On entry D contains the singular values of the two submatrices
+*>         to be combined. On exit D contains the trailing (N-K) updated
+*>         singular values (those which were deflated) sorted into
+*>         increasing order.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( M )
+*>         On exit Z contains the updating row vector in the secular
+*>         equation.
+*> \endverbatim
+*>
+*> \param[out] ZW
+*> \verbatim
+*>          ZW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for Z.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VF(1:NL+1) contains the first components of all
+*>         right singular vectors of the upper block; and VF(NL+2:M)
+*>         contains the first components of all right singular vectors
+*>         of the lower block. On exit, VF contains the first components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] VFW
+*> \verbatim
+*>          VFW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for VF.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VL(1:NL+1) contains the  last components of all
+*>         right singular vectors of the upper block; and VL(NL+2:M)
+*>         contains the last components of all right singular vectors
+*>         of the lower block. On exit, VL contains the last components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] VLW
+*> \verbatim
+*>          VLW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for VL.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISION
+*>         Contains the diagonal element associated with the added row.
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISION
+*>         Contains the off-diagonal element associated with the added
+*>         row.
+*> \endverbatim
+*>
+*> \param[out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION array, dimension ( N )
+*>         Contains a copy of the diagonal elements (K-1 singular values
+*>         and one zero) in the secular equation.
+*> \endverbatim
+*>
+*> \param[out] IDX
+*> \verbatim
+*>          IDX is INTEGER array, dimension ( N )
+*>         This will contain the permutation used to sort the contents of
+*>         D into ascending order.
+*> \endverbatim
+*>
+*> \param[out] IDXP
+*> \verbatim
+*>          IDXP is INTEGER array, dimension ( N )
+*>         This will contain the permutation used to place deflated
+*>         values of D at the end of the array. On output IDXP(2:K)
+*>         points to the nondeflated D-values and IDXP(K+1:N)
+*>         points to the deflated singular values.
+*> \endverbatim
+*>
+*> \param[in] IDXQ
+*> \verbatim
+*>          IDXQ is INTEGER array, dimension ( N )
+*>         This contains the permutation which separately sorts the two
+*>         sub-problems in D into ascending order.  Note that entries in
+*>         the first half of this permutation must first be moved one
+*>         position backward; and entries in the second half
+*>         must first have NL+1 added to their values.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) to be applied
+*>         to each singular block. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of columns to take place
+*>         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         The leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value to be used in the
+*>         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of GIVNUM, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>         = 0:  successful exit.
+*>         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+     $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   C, S, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+     $                   IDXQ( * ), PERM( * )
+      DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+     $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+     $                   ZW( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   EIGHT = 8.0D+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,
+     $                   NLP1, NLP2
+      DOUBLE PRECISION   EPS, HLFTOL, TAU, TOL, Z1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DROT, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -22
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -24
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD7', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+      IF( ICOMPQ.EQ.1 ) THEN
+         GIVPTR = 0
+      END IF
+*
+*     Generate the first part of the vector Z and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VL( NLP1 )
+      VL( NLP1 ) = ZERO
+      TAU = VF( NLP1 )
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VL( I )
+         VL( I ) = ZERO
+         VF( I+1 ) = VF( I )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+      VF( 1 ) = TAU
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VF( I )
+         VF( I ) = ZERO
+   20 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 30 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   30 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and ZW are used as storage space.
+*
+      DO 40 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         ZW( I ) = Z( IDXQ( I ) )
+         VFW( I ) = VF( IDXQ( I ) )
+         VLW( I ) = VL( IDXQ( I ) )
+   40 CONTINUE
+*
+      CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 50 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = ZW( IDXI )
+         VF( I ) = VFW( IDXI )
+         VL( I ) = VLW( IDXI )
+   50 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 60 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JPREV = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = JPREV
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = DLAPY2( C, S )
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+            C = C / TAU
+            S = -S / TAU
+*
+*           Record the appropriate Givens rotation
+*
+            IF( ICOMPQ.EQ.1 ) THEN
+               GIVPTR = GIVPTR + 1
+               IDXJP = IDXQ( IDX( JPREV )+1 )
+               IDXJ = IDXQ( IDX( J )+1 )
+               IF( IDXJP.LE.NLP1 ) THEN
+                  IDXJP = IDXJP - 1
+               END IF
+               IF( IDXJ.LE.NLP1 ) THEN
+                  IDXJ = IDXJ - 1
+               END IF
+               GIVCOL( GIVPTR, 2 ) = IDXJP
+               GIVCOL( GIVPTR, 1 ) = IDXJ
+               GIVNUM( GIVPTR, 2 ) = C
+               GIVNUM( GIVPTR, 1 ) = S
+            END IF
+            CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )
+            CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            ZW( K ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 80
+   90 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      ZW( K ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  100 CONTINUE
+*
+*     Sort the singular values into DSIGMA. The singular values which
+*     were not deflated go into the first K slots of DSIGMA, except
+*     that DSIGMA(1) is treated separately.
+*
+      DO 110 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         VFW( J ) = VF( JP )
+         VLW( J ) = VL( JP )
+  110 CONTINUE
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 120 J = 2, N
+            JP = IDXP( J )
+            PERM( J ) = IDXQ( IDX( JP )+1 )
+            IF( PERM( J ).LE.NLP1 ) THEN
+               PERM( J ) = PERM( J ) - 1
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     The deflated singular values go back into the last N - K slots of
+*     D.
+*
+      CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+*     VL(M).
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = DLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = -Z( M ) / Z( 1 )
+         END IF
+         CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )
+         CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Restore Z, VF, and VL.
+*
+      CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )
+      CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )
+      CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )
+*
+      RETURN
+*
+*     End of DLASD7
+*
+      END
diff --git a/lib/linalg/dlasd8.f b/lib/linalg/dlasd8.f
new file mode 100644
index 0000000000000000000000000000000000000000..fc5c48c5285b246d1ff2c1d073fc931f76505204
--- /dev/null
+++ b/lib/linalg/dlasd8.f
@@ -0,0 +1,342 @@
+*> \brief \b DLASD8 finds the square roots of the roots of the secular equation, and stores, for each element in D, the distance to its two nearest poles. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD8 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd8.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd8.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd8.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+*                          DSIGMA, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, K, LDDIFR
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+*      $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+*      $                   Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD8 finds the square roots of the roots of the secular equation,
+*> as defined by the values in DSIGMA and Z. It makes the appropriate
+*> calls to DLASD4, and stores, for each  element in D, the distance
+*> to its two nearest poles (elements in DSIGMA). It also updates
+*> the arrays VF and VL, the first and last components of all the
+*> right singular vectors of the original bidiagonal matrix.
+*>
+*> DLASD8 is called from DLASD6.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>          Specifies whether singular vectors are to be computed in
+*>          factored form in the calling routine:
+*>          = 0: Compute singular values only.
+*>          = 1: Compute singular vectors in factored form as well.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The number of terms in the rational function to be solved
+*>          by DLASD4.  K >= 1.
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( K )
+*>          On output, D contains the updated singular values.
+*> \endverbatim
+*>
+*> \param[in,out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, the first K elements of this array contain the
+*>          components of the deflation-adjusted updating row vector.
+*>          On exit, Z is updated.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, VF contains  information passed through DBEDE8.
+*>          On exit, VF contains the first K components of the first
+*>          components of all right singular vectors of the bidiagonal
+*>          matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, VL contains  information passed through DBEDE8.
+*>          On exit, VL contains the first K components of the last
+*>          components of all right singular vectors of the bidiagonal
+*>          matrix.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( K )
+*>          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*>                   dimension ( K ) if ICOMPQ = 0.
+*>          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*>          defined and will not be referenced.
+*>
+*>          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*>          normalizing factors for the right singular vector matrix.
+*> \endverbatim
+*>
+*> \param[in] LDDIFR
+*> \verbatim
+*>          LDDIFR is INTEGER
+*>          The leading dimension of DIFR, must be at least K.
+*> \endverbatim
+*>
+*> \param[in,out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, the first K elements of this array contain the old
+*>          roots of the deflated updating problem.  These are the poles
+*>          of the secular equation.
+*>          On exit, the elements of DSIGMA may be very slightly altered
+*>          in value.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (3*K)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDDIFR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+     $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+     $                   Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASCL, DLASD4, DLASET, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMC3, DNRM2
+      EXTERNAL           DDOT, DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( LDDIFR.LT.K ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = DNRM2( K, Z, 1 )
+      CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, report the convergence failure.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = DNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of DLASD8
+*
+      END
+
diff --git a/lib/linalg/dlasda.f b/lib/linalg/dlasda.f
new file mode 100644
index 0000000000000000000000000000000000000000..f41a108b804b730140a94532ee53dc69a601910f
--- /dev/null
+++ b/lib/linalg/dlasda.f
@@ -0,0 +1,514 @@
+*> \brief \b DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasda.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasda.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasda.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+*                          DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+*                          PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+*      $                   K( * ), PERM( LDGCOL, * )
+*       DOUBLE PRECISION   C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+*      $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+*      $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+*      $                   Z( LDU, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> Using a divide and conquer approach, DLASDA computes the singular
+*> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+*> B with diagonal D and offdiagonal E, where M = N + SQRE. The
+*> algorithm computes the singular values in the SVD B = U * S * VT.
+*> The orthogonal matrices U and VT are optionally computed in
+*> compact form.
+*>
+*> A related subroutine, DLASD0, computes the singular values and
+*> the singular vectors in explicit form.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed
+*>         in compact form, as follows
+*>         = 0: Compute singular values only.
+*>         = 1: Compute singular vectors of upper bidiagonal
+*>              matrix in compact form.
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The row dimension of the upper bidiagonal matrix. This is
+*>         also the dimension of the main diagonal array D.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         Specifies the column dimension of the bidiagonal matrix.
+*>         = 0: The bidiagonal matrix has column dimension M = N;
+*>         = 1: The bidiagonal matrix has column dimension M = N + 1.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( N )
+*>         On entry D contains the main diagonal of the bidiagonal
+*>         matrix. On exit D, if INFO = 0, contains its singular values.
+*> \endverbatim
+*>
+*> \param[in] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array, dimension ( M-1 )
+*>         Contains the subdiagonal entries of the bidiagonal matrix.
+*>         On exit, E has been destroyed.
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array,
+*>         dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+*>         singular vector matrices of all subproblems at the bottom
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER, LDU = > N.
+*>         The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+*>         GIVNUM, and Z.
+*> \endverbatim
+*>
+*> \param[out] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array,
+*>         dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT**T contains the right
+*>         singular vector matrices of all subproblems at the bottom
+*>         level.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER array,
+*>         dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+*>         If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+*>         secular equation on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+*>         where NLVL = floor(log_2 (N/SMLSIZ))).
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                  dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+*>                  dimension ( N ) if ICOMPQ = 0.
+*>         If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+*>         record distances between singular values on the I-th
+*>         level and singular values on the (I -1)-th level, and
+*>         DIFR(1:N, 2 * I ) contains the normalizing factors for
+*>         the right singular vector matrix. See DLASD8 for details.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array,
+*>                  dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+*>                  dimension ( N ) if ICOMPQ = 0.
+*>         The first K elements of Z(1, I) contain the components of
+*>         the deflation-adjusted updating row vector for subproblems
+*>         on the I-th level.
+*> \endverbatim
+*>
+*> \param[out] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array,
+*>         dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+*>         POLES(1, 2*I) contain  the new and old singular values
+*>         involved in the secular equations on the I-th level.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER array,
+*>         dimension ( N ) if ICOMPQ = 1, and not referenced if
+*>         ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+*>         the number of Givens rotations performed on the I-th
+*>         problem on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array,
+*>         dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+*>         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*>         GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+*>         of Givens rotations performed on the I-th level on the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER, LDGCOL = > N.
+*>         The leading dimension of arrays GIVCOL and PERM.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array,
+*>         dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+*>         permutations done on the I-th level of the computation tree.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array,
+*>         dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+*>         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*>         GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+*>         values of Givens rotations performed on the I-th level on
+*>         the computation tree.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array,
+*>         dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+*>         If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+*>         C( I ) contains the C-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension ( N ) if
+*>         ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+*>         and the I-th subproblem is not square, on exit, S( I )
+*>         contains the S-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension
+*>         (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (7*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+     $                   DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+     $                   PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU,
+     $                   NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASD6, DLASDQ, DLASDT, DLASET, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDU.LT.( N+SQRE ) ) THEN
+         INFO = -8
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDA', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+*
+*     If the input matrix is too small, call DLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         ELSE
+            CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         END IF
+         RETURN
+      END IF
+*
+*     Book-keeping and  set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+*
+      NCC = 0
+      NRU = 0
+*
+      SMLSZP = SMLSIZ + 1
+      VF = 1
+      VL = VF + M
+      NWORK1 = VL + M
+      NWORK2 = NWORK1 + SMLSZP*SMLSZP
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     for the nodes on bottom level of the tree, solve
+*     their subproblems by DLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 30 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         IDXQI = IDXQ + NLF - 2
+         VFI = VF + NLF - 1
+         VLI = VL + NLF - 1
+         SQREI = 1
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ),
+     $                   E( NLF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NL, WORK( NWORK2 ), NL,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + NL*SMLSZP
+            CALL DCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU )
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ),
+     $                   E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU,
+     $                   U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 10 J = 1, NL
+            IWORK( IDXQI+J ) = J
+   10    CONTINUE
+         IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN
+            SQREI = 0
+         ELSE
+            SQREI = 1
+         END IF
+         IDXQI = IDXQI + NLP1
+         VFI = VFI + NLP1
+         VLI = VLI + NLP1
+         NRP1 = NR + SQREI
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ),
+     $                   E( NRF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NR, WORK( NWORK2 ), NR,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP
+            CALL DCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU )
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ),
+     $                   E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU,
+     $                   U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 20 J = 1, NR
+            IWORK( IDXQI+J ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      J = 2**NLVL
+      DO 50 LVL = NLVL, 1, -1
+         LVL2 = LVL*2 - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            VFI = VF + NLF - 1
+            VLI = VL + NLF - 1
+            IDXQI = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            IF( ICOMPQ.EQ.0 ) THEN
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL,
+     $                      LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z,
+     $                      K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            ELSE
+               J = J - 1
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM( NLF, LVL ),
+     $                      GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                      GIVNUM( NLF, LVL2 ), LDU,
+     $                      POLES( NLF, LVL2 ), DIFL( NLF, LVL ),
+     $                      DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ),
+     $                      C( J ), S( J ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDA
+*
+      END
diff --git a/lib/linalg/dlasdq.f b/lib/linalg/dlasdq.f
new file mode 100644
index 0000000000000000000000000000000000000000..e7d3575a9897afcd64e7d3d76781223873faa99d
--- /dev/null
+++ b/lib/linalg/dlasdq.f
@@ -0,0 +1,413 @@
+*> \brief \b DLASDQ computes the SVD of a real bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasdq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasdq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasdq.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+*                          U, LDU, C, LDC, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          UPLO
+*       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+*      $                   VT( LDVT, * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASDQ computes the singular value decomposition (SVD) of a real
+*> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+*> E, accumulating the transformations if desired. Letting B denote
+*> the input bidiagonal matrix, the algorithm computes orthogonal
+*> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose
+*> of P). The singular values S are overwritten on D.
+*>
+*> The input matrix U  is changed to U  * Q  if desired.
+*> The input matrix VT is changed to P**T * VT if desired.
+*> The input matrix C  is changed to Q**T * C  if desired.
+*>
+*> See "Computing  Small Singular Values of Bidiagonal Matrices With
+*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*> LAPACK Working Note #3, for a detailed description of the algorithm.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>        On entry, UPLO specifies whether the input bidiagonal matrix
+*>        is upper or lower bidiagonal, and whether it is square are
+*>        not.
+*>           UPLO = 'U' or 'u'   B is upper bidiagonal.
+*>           UPLO = 'L' or 'l'   B is lower bidiagonal.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>        = 0: then the input matrix is N-by-N.
+*>        = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+*>             (N+1)-by-N if UPLU = 'L'.
+*>
+*>        The bidiagonal matrix has
+*>        N = NL + NR + 1 rows and
+*>        M = N + SQRE >= N columns.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>        On entry, N specifies the number of rows and columns
+*>        in the matrix. N must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NCVT
+*> \verbatim
+*>          NCVT is INTEGER
+*>        On entry, NCVT specifies the number of columns of
+*>        the matrix VT. NCVT must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NRU
+*> \verbatim
+*>          NRU is INTEGER
+*>        On entry, NRU specifies the number of rows of
+*>        the matrix U. NRU must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NCC
+*> \verbatim
+*>          NCC is INTEGER
+*>        On entry, NCC specifies the number of columns of
+*>        the matrix C. NCC must be at least 0.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension (N)
+*>        On entry, D contains the diagonal entries of the
+*>        bidiagonal matrix whose SVD is desired. On normal exit,
+*>        D contains the singular values in ascending order.
+*> \endverbatim
+*>
+*> \param[in,out] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array.
+*>        dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+*>        On entry, the entries of E contain the offdiagonal entries
+*>        of the bidiagonal matrix whose SVD is desired. On normal
+*>        exit, E will contain 0. If the algorithm does not converge,
+*>        D and E will contain the diagonal and superdiagonal entries
+*>        of a bidiagonal matrix orthogonally equivalent to the one
+*>        given as input.
+*> \endverbatim
+*>
+*> \param[in,out] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
+*>        On entry, contains a matrix which on exit has been
+*>        premultiplied by P**T, dimension N-by-NCVT if SQRE = 0
+*>        and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+*> \endverbatim
+*>
+*> \param[in] LDVT
+*> \verbatim
+*>          LDVT is INTEGER
+*>        On entry, LDVT specifies the leading dimension of VT as
+*>        declared in the calling (sub) program. LDVT must be at
+*>        least 1. If NCVT is nonzero LDVT must also be at least N.
+*> \endverbatim
+*>
+*> \param[in,out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension (LDU, N)
+*>        On entry, contains a  matrix which on exit has been
+*>        postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+*>        and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER
+*>        On entry, LDU  specifies the leading dimension of U as
+*>        declared in the calling (sub) program. LDU must be at
+*>        least max( 1, NRU ) .
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array, dimension (LDC, NCC)
+*>        On entry, contains an N-by-NCC matrix which on exit
+*>        has been premultiplied by Q**T  dimension N-by-NCC if SQRE = 0
+*>        and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>        On entry, LDC  specifies the leading dimension of C as
+*>        declared in the calling (sub) program. LDC must be at
+*>        least 1. If NCC is nonzero, LDC must also be at least N.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (4*N)
+*>        Workspace. Only referenced if one of NCVT, NRU, or NCC is
+*>        nonzero, and if N is at least 2.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>        On exit, a value of 0 indicates a successful exit.
+*>        If INFO < 0, argument number -INFO is illegal.
+*>        If INFO > 0, the algorithm did not converge, and INFO
+*>        specifies how many superdiagonals did not converge.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+     $                   U, LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ROTATE
+      INTEGER            I, ISUB, IUPLO, J, NP1, SQRE1
+      DOUBLE PRECISION   CS, R, SMIN, SN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DLARTG, DLASR, DSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -12
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDQ', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+      NP1 = N + 1
+      SQRE1 = SQRE
+*
+*     If matrix non-square upper bidiagonal, rotate to be lower
+*     bidiagonal.  The rotations are on the right.
+*
+      IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   10    CONTINUE
+         CALL DLARTG( D( N ), E( N ), CS, SN, R )
+         D( N ) = R
+         E( N ) = ZERO
+         IF( ROTATE ) THEN
+            WORK( N ) = CS
+            WORK( N+N ) = SN
+         END IF
+         IUPLO = 2
+         SQRE1 = 0
+*
+*        Update singular vectors if desired.
+*
+         IF( NCVT.GT.0 )
+     $      CALL DLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ),
+     $                  WORK( NP1 ), VT, LDVT )
+      END IF
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left.
+*
+      IF( IUPLO.EQ.2 ) THEN
+         DO 20 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   20    CONTINUE
+*
+*        If matrix (N+1)-by-N lower bidiagonal, one additional
+*        rotation is needed.
+*
+         IF( SQRE1.EQ.1 ) THEN
+            CALL DLARTG( D( N ), E( N ), CS, SN, R )
+            D( N ) = R
+            IF( ROTATE ) THEN
+               WORK( N ) = CS
+               WORK( N+N ) = SN
+            END IF
+         END IF
+*
+*        Update singular vectors if desired.
+*
+         IF( NRU.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            ELSE
+               CALL DLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            END IF
+         END IF
+         IF( NCC.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            ELSE
+               CALL DLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            END IF
+         END IF
+      END IF
+*
+*     Call DBDSQR to compute the SVD of the reduced real
+*     N-by-N upper bidiagonal matrix.
+*
+      CALL DBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C,
+     $             LDC, WORK, INFO )
+*
+*     Sort the singular values into ascending order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 40 I = 1, N
+*
+*        Scan for smallest D(I).
+*
+         ISUB = I
+         SMIN = D( I )
+         DO 30 J = I + 1, N
+            IF( D( J ).LT.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+   30    CONTINUE
+         IF( ISUB.NE.I ) THEN
+*
+*           Swap singular values and vectors.
+*
+            D( ISUB ) = D( I )
+            D( I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC )
+         END IF
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDQ
+*
+      END
diff --git a/lib/linalg/dlasdt.f b/lib/linalg/dlasdt.f
new file mode 100644
index 0000000000000000000000000000000000000000..37da2d035e91a3d4ef9ace7396dc2de5511091af
--- /dev/null
+++ b/lib/linalg/dlasdt.f
@@ -0,0 +1,172 @@
+*> \brief \b DLASDT creates a tree of subproblems for bidiagonal divide and conquer. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasdt.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasdt.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasdt.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            LVL, MSUB, N, ND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASDT creates a tree of subproblems for bidiagonal divide and
+*> conquer.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          On entry, the number of diagonal elements of the
+*>          bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] LVL
+*> \verbatim
+*>          LVL is INTEGER
+*>          On exit, the number of levels on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] ND
+*> \verbatim
+*>          ND is INTEGER
+*>          On exit, the number of nodes on the tree.
+*> \endverbatim
+*>
+*> \param[out] INODE
+*> \verbatim
+*>          INODE is INTEGER array, dimension ( N )
+*>          On exit, centers of subproblems.
+*> \endverbatim
+*>
+*> \param[out] NDIML
+*> \verbatim
+*>          NDIML is INTEGER array, dimension ( N )
+*>          On exit, row dimensions of left children.
+*> \endverbatim
+*>
+*> \param[out] NDIMR
+*> \verbatim
+*>          NDIMR is INTEGER array, dimension ( N )
+*>          On exit, row dimensions of right children.
+*> \endverbatim
+*>
+*> \param[in] MSUB
+*> \verbatim
+*>          MSUB is INTEGER
+*>          On entry, the maximum row dimension each subproblem at the
+*>          bottom of the tree can be of.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            LVL, MSUB, N, ND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IR, LLST, MAXN, NCRNT, NLVL
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Find the number of levels on the tree.
+*
+      MAXN = MAX( 1, N )
+      TEMP = LOG( DBLE( MAXN ) / DBLE( MSUB+1 ) ) / LOG( TWO )
+      LVL = INT( TEMP ) + 1
+*
+      I = N / 2
+      INODE( 1 ) = I + 1
+      NDIML( 1 ) = I
+      NDIMR( 1 ) = N - I - 1
+      IL = 0
+      IR = 1
+      LLST = 1
+      DO 20 NLVL = 1, LVL - 1
+*
+*        Constructing the tree at (NLVL+1)-st level. The number of
+*        nodes created on this level is LLST * 2.
+*
+         DO 10 I = 0, LLST - 1
+            IL = IL + 2
+            IR = IR + 2
+            NCRNT = LLST + I
+            NDIML( IL ) = NDIML( NCRNT ) / 2
+            NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1
+            INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1
+            NDIML( IR ) = NDIMR( NCRNT ) / 2
+            NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1
+            INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1
+   10    CONTINUE
+         LLST = LLST*2
+   20 CONTINUE
+      ND = LLST*2 - 1
+*
+      RETURN
+*
+*     End of DLASDT
+*
+      END
diff --git a/lib/linalg/dlaset.f b/lib/linalg/dlaset.f
index 1ce34662ab7cd468118ab53ac559bc146339ac0c..3a0c469a3ca48571881328e7b3379c1e089aec80 100644
--- a/lib/linalg/dlaset.f
+++ b/lib/linalg/dlaset.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASET + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaset.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaset.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaset.f"> 
+*> Download DLASET + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaset.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaset.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaset.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -77,7 +77,7 @@
 *>          The constant to which the diagonal elements are to be set.
 *> \endverbatim
 *>
-*> \param[in,out] A
+*> \param[out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On exit, the leading m-by-n submatrix of A is set as follows:
@@ -98,22 +98,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dlasq1.f b/lib/linalg/dlasq1.f
index d12fb7a5fd45e3be2bdc8a98e6cd40c7f7f65f02..468676eebd3b88ec09d524263d2d6dd6b47fbc17 100644
--- a/lib/linalg/dlasq1.f
+++ b/lib/linalg/dlasq1.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ1 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq1.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq1.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq1.f"> 
+*> Download DLASQ1 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq1.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq1.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq1.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASQ1( N, D, E, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), E( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -89,29 +89,29 @@
 *>                  represent a matrix with the same singular values
 *>                  which the calling subroutine could use to finish the
 *>                  computation, or even feed back into DLASQ1
-*>             = 3, termination criterion of outer while loop not met 
+*>             = 3, termination criterion of outer while loop not met
 *>                  (program created more than N unreduced blocks)
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DLASQ1( N, D, E, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, N
@@ -144,7 +144,7 @@
 *
       INFO = 0
       IF( N.LT.0 ) THEN
-         INFO = -2
+         INFO = -1
          CALL XERBLA( 'DLASQ1', -INFO )
          RETURN
       ELSE IF( N.EQ.0 ) THEN
@@ -189,7 +189,7 @@
       CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 )
       CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1,
      $             IINFO )
-*         
+*
 *     Compute the q's and e's.
 *
       DO 30 I = 1, 2*N - 1
diff --git a/lib/linalg/dlasq2.f b/lib/linalg/dlasq2.f
index df1690d020d282c25d12d575f94a22ccef3cd3a6..68d9228704e64b4495ede050666f2b837ad4117e 100644
--- a/lib/linalg/dlasq2.f
+++ b/lib/linalg/dlasq2.f
@@ -2,38 +2,38 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq2.f"> 
+*> Download DLASQ2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASQ2( N, Z, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
-*> DLASQ2 computes all the eigenvalues of the symmetric positive 
+*> DLASQ2 computes all the eigenvalues of the symmetric positive
 *> definite tridiagonal matrix associated with the qd array Z to high
 *> relative accuracy are computed to high relative accuracy, in the
 *> absence of denormalization, underflow and overflow.
@@ -83,19 +83,19 @@
 *>              = 2, current block of Z not diagonalized after 100*N
 *>                   iterations (in inner while loop).  On exit Z holds
 *>                   a qd array with the same eigenvalues as the given Z.
-*>              = 3, termination criterion of outer while loop not met 
+*>              = 3, termination criterion of outer while loop not met
 *>                   (program created more than N unreduced blocks)
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -112,10 +112,10 @@
 *  =====================================================================
       SUBROUTINE DLASQ2( N, Z, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, N
@@ -136,7 +136,7 @@
 *     .. Local Scalars ..
       LOGICAL            IEEE
       INTEGER            I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB,
-     $                   K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT, 
+     $                   K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT,
      $                   TTYPE
       DOUBLE PRECISION   D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN,
      $                   DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX,
@@ -155,7 +155,7 @@
       INTRINSIC          ABS, DBLE, MAX, MIN, SQRT
 *     ..
 *     .. Executable Statements ..
-*      
+*
 *     Test the input arguments.
 *     (in case DLASQ2 is not called by DLASQ1)
 *
@@ -195,7 +195,7 @@
          END IF
          Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
          IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
-            T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) 
+            T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) )
             S = Z( 3 )*( Z( 2 ) / T )
             IF( S.LE.T ) THEN
                S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
@@ -264,19 +264,19 @@
          Z( 2*N-1 ) = ZERO
          RETURN
       END IF
-*         
+*
 *     Check whether the machine is IEEE conformable.
-*         
+*
       IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
-     $       ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1      
-*         
+     $       ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1
+*
 *     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
 *
       DO 30 K = 2*N, 2, -2
-         Z( 2*K ) = ZERO 
-         Z( 2*K-1 ) = Z( K ) 
-         Z( 2*K-2 ) = ZERO 
-         Z( 2*K-3 ) = Z( K-1 ) 
+         Z( 2*K ) = ZERO
+         Z( 2*K-1 ) = Z( K )
+         Z( 2*K-2 ) = ZERO
+         Z( 2*K-3 ) = Z( K-1 )
    30 CONTINUE
 *
       I0 = 1
@@ -333,7 +333,7 @@
                D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
             END IF
             EMIN = MIN( EMIN, Z( I4-2*PP ) )
-   60    CONTINUE 
+   60    CONTINUE
          Z( 4*N0-PP-2 ) = D
 *
 *        Now find qmax.
@@ -364,14 +364,14 @@
       NDIV = 2*( N0-I0 )
 *
       DO 160 IWHILA = 1, N + 1
-         IF( N0.LT.1 ) 
+         IF( N0.LT.1 )
      $      GO TO 170
 *
-*        While array unfinished do 
+*        While array unfinished do
 *
 *        E(N0) holds the value of SIGMA when submatrix in I0:N0
 *        splits from the rest of the array, but is negated.
-*      
+*
          DESIG = ZERO
          IF( N0.EQ.N ) THEN
             SIGMA = ZERO
@@ -386,7 +386,7 @@
 *        Find last unreduced submatrix's top index I0, find QMAX and
 *        EMIN. Find Gershgorin-type bound if Q's much greater than E's.
 *
-         EMAX = ZERO 
+         EMAX = ZERO
          IF( N0.GT.I0 ) THEN
             EMIN = ABS( Z( 4*N0-5 ) )
          ELSE
@@ -404,7 +404,7 @@
             QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
             EMIN = MIN( EMIN, Z( I4-5 ) )
    90    CONTINUE
-         I4 = 4 
+         I4 = 4
 *
   100    CONTINUE
          I0 = I4 / 4
@@ -421,7 +421,7 @@
                   KMIN = ( I4+3 )/4
                END IF
   110       CONTINUE
-            IF( (KMIN-I0)*2.LT.N0-KMIN .AND. 
+            IF( (KMIN-I0)*2.LT.N0-KMIN .AND.
      $         DEEMIN.LE.HALF*Z(4*N0-3) ) THEN
                IPN4 = 4*( I0+N0 )
                PP = 2
@@ -446,15 +446,15 @@
 *
          DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
 *
-*        Now I0:N0 is unreduced. 
+*        Now I0:N0 is unreduced.
 *        PP = 0 for ping, PP = 1 for pong.
 *        PP = 2 indicates that flipping was applied to the Z array and
-*               and that the tests for deflation upon entry in DLASQ3 
+*               and that the tests for deflation upon entry in DLASQ3
 *               should not be performed.
 *
          NBIG = 100*( N0-I0+1 )
          DO 140 IWHILB = 1, NBIG
-            IF( I0.GT.N0 ) 
+            IF( I0.GT.N0 )
      $         GO TO 150
 *
 *           While submatrix unfinished take a good dqds step.
@@ -497,8 +497,8 @@
   140    CONTINUE
 *
          INFO = 2
-*       
-*        Maximum number of iterations exceeded, restore the shift 
+*
+*        Maximum number of iterations exceeded, restore the shift
 *        SIGMA and place the new d's and e's in a qd array.
 *        This might need to be done for several blocks
 *
@@ -549,16 +549,16 @@
       INFO = 3
       RETURN
 *
-*     end IWHILA   
+*     end IWHILA
 *
   170 CONTINUE
-*      
+*
 *     Move q's to the front.
-*      
+*
       DO 180 K = 2, N
          Z( K ) = Z( 4*K-3 )
   180 CONTINUE
-*      
+*
 *     Sort and compute sum of eigenvalues.
 *
       CALL DLASRT( 'D', N, Z, IINFO )
@@ -570,7 +570,7 @@
 *
 *     Store trace, sum(eigenvalues) and information on performance.
 *
-      Z( 2*N+1 ) = TRACE 
+      Z( 2*N+1 ) = TRACE
       Z( 2*N+2 ) = E
       Z( 2*N+3 ) = DBLE( ITER )
       Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )
diff --git a/lib/linalg/dlasq3.f b/lib/linalg/dlasq3.f
index d49d1c59398a71f8beccf9932434fe2e477a8527..c095bdbbb58b2f15a926618d258dd379141d58c6 100644
--- a/lib/linalg/dlasq3.f
+++ b/lib/linalg/dlasq3.f
@@ -2,18 +2,18 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ3 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq3.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq3.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq3.f"> 
+*> Download DLASQ3 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq3.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq3.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq3.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -21,7 +21,7 @@
 *       SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
 *                          ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
 *                          DN2, G, TAU )
-* 
+*
 *       .. Scalar Arguments ..
 *       LOGICAL            IEEE
 *       INTEGER            I0, ITER, N0, NDIV, NFAIL, PP
@@ -31,7 +31,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -58,9 +58,9 @@
 *>         Last index.
 *> \endverbatim
 *>
-*> \param[in] Z
+*> \param[in,out] Z
 *> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension ( 4*N )
+*>          Z is DOUBLE PRECISION array, dimension ( 4*N0 )
 *>         Z holds the qd array.
 *> \endverbatim
 *>
@@ -68,8 +68,8 @@
 *> \verbatim
 *>          PP is INTEGER
 *>         PP=0 for ping, PP=1 for pong.
-*>         PP=2 indicates that flipping was applied to the Z array   
-*>         and that the initial tests for deflation should not be 
+*>         PP=2 indicates that flipping was applied to the Z array
+*>         and that the initial tests for deflation should not be
 *>         performed.
 *> \endverbatim
 *>
@@ -97,22 +97,22 @@
 *>         Maximum value of q.
 *> \endverbatim
 *>
-*> \param[out] NFAIL
+*> \param[in,out] NFAIL
 *> \verbatim
 *>          NFAIL is INTEGER
-*>         Number of times shift was too big.
+*>         Increment NFAIL by 1 each time the shift was too big.
 *> \endverbatim
 *>
-*> \param[out] ITER
+*> \param[in,out] ITER
 *> \verbatim
 *>          ITER is INTEGER
-*>         Number of iterations.
+*>         Increment ITER by 1 for each iteration.
 *> \endverbatim
 *>
-*> \param[out] NDIV
+*> \param[in,out] NDIV
 *> \verbatim
 *>          NDIV is INTEGER
-*>         Number of divisions.
+*>         Increment NDIV by 1 for each division.
 *> \endverbatim
 *>
 *> \param[in] IEEE
@@ -168,12 +168,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -182,10 +182,10 @@
      $                   ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
      $                   DN2, G, TAU )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       LOGICAL            IEEE
@@ -286,7 +286,7 @@
       GO TO 10
 *
    50 CONTINUE
-      IF( PP.EQ.2 ) 
+      IF( PP.EQ.2 )
      $   PP = 0
 *
 *     Reverse the qd-array, if warranted.
@@ -345,7 +345,7 @@
 *
          GO TO 90
 *
-      ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. 
+      ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND.
      $         Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND.
      $         ABS( DN ).LT.TOL*SIGMA ) THEN
 *
@@ -389,7 +389,7 @@
             GO TO 70
          END IF
       ELSE
-*            
+*
 *        Possible underflow. Play it safe.
 *
          GO TO 80
diff --git a/lib/linalg/dlasq4.f b/lib/linalg/dlasq4.f
index 97d9bdeba31a930364553d60ffba192f91f01df7..d4ddbbc7b2e9b8d7383be615becb7c6fdb0a37a0 100644
--- a/lib/linalg/dlasq4.f
+++ b/lib/linalg/dlasq4.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ4 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq4.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq4.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq4.f"> 
+*> Download DLASQ4 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq4.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq4.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq4.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN,
 *                          DN1, DN2, TAU, TTYPE, G )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            I0, N0, N0IN, PP, TTYPE
 *       DOUBLE PRECISION   DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -56,7 +56,7 @@
 *>
 *> \param[in] Z
 *> \verbatim
-*>          Z is DOUBLE PRECISION array, dimension ( 4*N )
+*>          Z is DOUBLE PRECISION array, dimension ( 4*N0 )
 *>        Z holds the qd array.
 *> \endverbatim
 *>
@@ -122,7 +122,7 @@
 *>
 *> \param[in,out] G
 *> \verbatim
-*>          G is REAL
+*>          G is DOUBLE PRECISION
 *>        G is passed as an argument in order to save its value between
 *>        calls to DLASQ4.
 *> \endverbatim
@@ -130,12 +130,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -151,10 +151,10 @@
       SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN,
      $                   DN1, DN2, TAU, TTYPE, G )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            I0, N0, N0IN, PP, TTYPE
@@ -192,7 +192,7 @@
          TTYPE = -1
          RETURN
       END IF
-*       
+*
       NN = 4*N0 + PP
       IF( N0IN.EQ.N0 ) THEN
 *
@@ -240,7 +240,6 @@
                   NP = NN - 9
                ELSE
                   NP = NN - 2*PP
-                  B2 = Z( NP-2 )
                   GAM = DN1
                   IF( Z( NP-4 ) .GT. Z( NP-2 ) )
      $               RETURN
@@ -262,7 +261,7 @@
      $               RETURN
                   B2 = B2*( Z( I4 ) / Z( I4-2 ) )
                   A2 = A2 + B2
-                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 )
      $               GO TO 20
    10          CONTINUE
    20          CONTINUE
@@ -303,7 +302,7 @@
      $               RETURN
                   B2 = B2*( Z( I4 ) / Z( I4-2 ) )
                   A2 = A2 + B2
-                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 )
      $               GO TO 40
    30          CONTINUE
    40          CONTINUE
@@ -331,7 +330,7 @@
 *
 *        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN.
 *
-         IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN 
+         IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN
 *
 *           Cases 7 and 8.
 *
@@ -349,7 +348,7 @@
      $            RETURN
                B1 = B1*( Z( I4 ) / Z( I4-2 ) )
                B2 = B2 + B1
-               IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) 
+               IF( HUNDRD*MAX( B1, A2 ).LT.B2 )
      $            GO TO 60
    50       CONTINUE
    60       CONTINUE
@@ -358,7 +357,7 @@
             GAP2 = HALF*DMIN2 - A2
             IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
                S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
-            ELSE 
+            ELSE
                S = MAX( S, A2*( ONE-CNST2*B2 ) )
                TTYPE = -8
             END IF
@@ -378,7 +377,7 @@
 *
 *        Cases 10 and 11.
 *
-         IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN 
+         IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN
             TTYPE = -10
             S = THIRD*DMIN2
             IF( Z( NN-5 ).GT.Z( NN-7 ) )
@@ -402,7 +401,7 @@
      $             SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2
             IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
                S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
-            ELSE 
+            ELSE
                S = MAX( S, A2*( ONE-CNST2*B2 ) )
             END IF
          ELSE
@@ -413,7 +412,7 @@
 *
 *        Case 12, more than two eigenvalues deflated. No information.
 *
-         S = ZERO 
+         S = ZERO
          TTYPE = -12
       END IF
 *
diff --git a/lib/linalg/dlasq5.f b/lib/linalg/dlasq5.f
index cdd8cf1ae3d2cc05f29c7f4e5c3df1cb82a3889f..3812c879fa2df1d2017b6d40c36c79d4452c34f3 100644
--- a/lib/linalg/dlasq5.f
+++ b/lib/linalg/dlasq5.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ5 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq5.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq5.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq5.f"> 
+*> Download DLASQ5 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq5.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq5.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq5.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN,
 *                          DNM1, DNM2, IEEE, EPS )
-* 
+*
 *       .. Scalar Arguments ..
 *       LOGICAL            IEEE
 *       INTEGER            I0, N0, PP
@@ -29,7 +29,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -121,7 +121,7 @@
 *>          IEEE is LOGICAL
 *>        Flag for IEEE or non IEEE arithmetic.
 *> \endverbatim
-*
+*>
 *> \param[in] EPS
 *> \verbatim
 *>          EPS is DOUBLE PRECISION
@@ -131,12 +131,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -144,10 +144,10 @@
       SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2,
      $                   DN, DNM1, DNM2, IEEE, EPS )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       LOGICAL            IEEE
@@ -181,7 +181,7 @@
       IF( TAU.LT.DTHRESH*HALF ) TAU = ZERO
       IF( TAU.NE.ZERO ) THEN
       J4 = 4*I0 + PP - 3
-      EMIN = Z( J4+4 ) 
+      EMIN = Z( J4+4 )
       D = Z( J4 ) - TAU
       DMIN = D
       DMIN1 = -Z( J4 )
@@ -192,7 +192,7 @@
 *
          IF( PP.EQ.0 ) THEN
             DO 10 J4 = 4*I0, 4*( N0-3 ), 4
-               Z( J4-2 ) = D + Z( J4-1 ) 
+               Z( J4-2 ) = D + Z( J4-1 )
                TEMP = Z( J4+1 ) / Z( J4-2 )
                D = D*TEMP - TAU
                DMIN = MIN( DMIN, D )
@@ -201,7 +201,7 @@
    10       CONTINUE
          ELSE
             DO 20 J4 = 4*I0, 4*( N0-3 ), 4
-               Z( J4-3 ) = D + Z( J4 ) 
+               Z( J4-3 ) = D + Z( J4 )
                TEMP = Z( J4+2 ) / Z( J4-3 )
                D = D*TEMP - TAU
                DMIN = MIN( DMIN, D )
@@ -210,7 +210,7 @@
    20       CONTINUE
          END IF
 *
-*        Unroll last two steps. 
+*        Unroll last two steps.
 *
          DNM2 = D
          DMIN2 = DMIN
@@ -235,10 +235,10 @@
 *
          IF( PP.EQ.0 ) THEN
             DO 30 J4 = 4*I0, 4*( N0-3 ), 4
-               Z( J4-2 ) = D + Z( J4-1 ) 
+               Z( J4-2 ) = D + Z( J4-1 )
                IF( D.LT.ZERO ) THEN
                   RETURN
-               ELSE 
+               ELSE
                   Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
                   D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU
                END IF
@@ -247,10 +247,10 @@
    30       CONTINUE
          ELSE
             DO 40 J4 = 4*I0, 4*( N0-3 ), 4
-               Z( J4-3 ) = D + Z( J4 ) 
+               Z( J4-3 ) = D + Z( J4 )
                IF( D.LT.ZERO ) THEN
                   RETURN
-               ELSE 
+               ELSE
                   Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
                   D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU
                END IF
@@ -259,7 +259,7 @@
    40       CONTINUE
          END IF
 *
-*        Unroll last two steps. 
+*        Unroll last two steps.
 *
          DNM2 = D
          DMIN2 = DMIN
@@ -290,17 +290,17 @@
       ELSE
 *     This is the version that sets d's to zero if they are small enough
          J4 = 4*I0 + PP - 3
-         EMIN = Z( J4+4 ) 
+         EMIN = Z( J4+4 )
          D = Z( J4 ) - TAU
          DMIN = D
          DMIN1 = -Z( J4 )
          IF( IEEE ) THEN
-*     
+*
 *     Code for IEEE arithmetic.
-*     
+*
             IF( PP.EQ.0 ) THEN
                DO 50 J4 = 4*I0, 4*( N0-3 ), 4
-                  Z( J4-2 ) = D + Z( J4-1 ) 
+                  Z( J4-2 ) = D + Z( J4-1 )
                   TEMP = Z( J4+1 ) / Z( J4-2 )
                   D = D*TEMP - TAU
                   IF( D.LT.DTHRESH ) D = ZERO
@@ -310,7 +310,7 @@
  50            CONTINUE
             ELSE
                DO 60 J4 = 4*I0, 4*( N0-3 ), 4
-                  Z( J4-3 ) = D + Z( J4 ) 
+                  Z( J4-3 ) = D + Z( J4 )
                   TEMP = Z( J4+2 ) / Z( J4-3 )
                   D = D*TEMP - TAU
                   IF( D.LT.DTHRESH ) D = ZERO
@@ -319,9 +319,9 @@
                   EMIN = MIN( Z( J4-1 ), EMIN )
  60            CONTINUE
             END IF
-*     
-*     Unroll last two steps. 
-*     
+*
+*     Unroll last two steps.
+*
             DNM2 = D
             DMIN2 = DMIN
             J4 = 4*( N0-2 ) - PP
@@ -330,7 +330,7 @@
             Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
             DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
             DMIN = MIN( DMIN, DNM1 )
-*     
+*
             DMIN1 = DMIN
             J4 = J4 + 4
             J4P2 = J4 + 2*PP - 1
@@ -338,17 +338,17 @@
             Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
             DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
             DMIN = MIN( DMIN, DN )
-*     
+*
          ELSE
-*     
+*
 *     Code for non IEEE arithmetic.
-*     
+*
             IF( PP.EQ.0 ) THEN
                DO 70 J4 = 4*I0, 4*( N0-3 ), 4
-                  Z( J4-2 ) = D + Z( J4-1 ) 
+                  Z( J4-2 ) = D + Z( J4-1 )
                   IF( D.LT.ZERO ) THEN
                      RETURN
-                  ELSE 
+                  ELSE
                      Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
                      D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU
                   END IF
@@ -358,10 +358,10 @@
  70            CONTINUE
             ELSE
                DO 80 J4 = 4*I0, 4*( N0-3 ), 4
-                  Z( J4-3 ) = D + Z( J4 ) 
+                  Z( J4-3 ) = D + Z( J4 )
                   IF( D.LT.ZERO ) THEN
                      RETURN
-                  ELSE 
+                  ELSE
                      Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
                      D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU
                   END IF
@@ -370,9 +370,9 @@
                   EMIN = MIN( EMIN, Z( J4-1 ) )
  80            CONTINUE
             END IF
-*     
-*     Unroll last two steps. 
-*     
+*
+*     Unroll last two steps.
+*
             DNM2 = D
             DMIN2 = DMIN
             J4 = 4*( N0-2 ) - PP
@@ -385,7 +385,7 @@
                DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
             END IF
             DMIN = MIN( DMIN, DNM1 )
-*     
+*
             DMIN1 = DMIN
             J4 = J4 + 4
             J4P2 = J4 + 2*PP - 1
@@ -397,10 +397,10 @@
                DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
             END IF
             DMIN = MIN( DMIN, DN )
-*     
+*
          END IF
       END IF
-*     
+*
       Z( J4+2 ) = DN
       Z( 4*N0-PP ) = EMIN
       RETURN
diff --git a/lib/linalg/dlasq6.f b/lib/linalg/dlasq6.f
index 3c8661bbba264994a35b8131aba2e93b348b6ee2..d871386bdbdbe09b244e17455136a1b9fa568c1f 100644
--- a/lib/linalg/dlasq6.f
+++ b/lib/linalg/dlasq6.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASQ6 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq6.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq6.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq6.f"> 
+*> Download DLASQ6 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq6.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq6.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq6.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN,
 *                          DNM1, DNM2 )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            I0, N0, PP
 *       DOUBLE PRECISION   DMIN, DMIN1, DMIN2, DN, DNM1, DNM2
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   Z( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -106,12 +106,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -119,10 +119,10 @@
       SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN,
      $                   DNM1, DNM2 )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            I0, N0, PP
@@ -156,13 +156,13 @@
 *
       SAFMIN = DLAMCH( 'Safe minimum' )
       J4 = 4*I0 + PP - 3
-      EMIN = Z( J4+4 ) 
+      EMIN = Z( J4+4 )
       D = Z( J4 )
       DMIN = D
 *
       IF( PP.EQ.0 ) THEN
          DO 10 J4 = 4*I0, 4*( N0-3 ), 4
-            Z( J4-2 ) = D + Z( J4-1 ) 
+            Z( J4-2 ) = D + Z( J4-1 )
             IF( Z( J4-2 ).EQ.ZERO ) THEN
                Z( J4 ) = ZERO
                D = Z( J4+1 )
@@ -173,7 +173,7 @@
                TEMP = Z( J4+1 ) / Z( J4-2 )
                Z( J4 ) = Z( J4-1 )*TEMP
                D = D*TEMP
-            ELSE 
+            ELSE
                Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
                D = Z( J4+1 )*( D / Z( J4-2 ) )
             END IF
@@ -182,7 +182,7 @@
    10    CONTINUE
       ELSE
          DO 20 J4 = 4*I0, 4*( N0-3 ), 4
-            Z( J4-3 ) = D + Z( J4 ) 
+            Z( J4-3 ) = D + Z( J4 )
             IF( Z( J4-3 ).EQ.ZERO ) THEN
                Z( J4-1 ) = ZERO
                D = Z( J4+2 )
@@ -193,7 +193,7 @@
                TEMP = Z( J4+2 ) / Z( J4-3 )
                Z( J4-1 ) = Z( J4 )*TEMP
                D = D*TEMP
-            ELSE 
+            ELSE
                Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
                D = Z( J4+2 )*( D / Z( J4-3 ) )
             END IF
@@ -202,7 +202,7 @@
    20    CONTINUE
       END IF
 *
-*     Unroll last two steps. 
+*     Unroll last two steps.
 *
       DNM2 = D
       DMIN2 = DMIN
diff --git a/lib/linalg/dlasr.f b/lib/linalg/dlasr.f
index 645d03b3d87a6e1c1067bf28db973d2a4f9d2994..6059c6293aa7ed169d61b573fb4a487dfd6bd24c 100644
--- a/lib/linalg/dlasr.f
+++ b/lib/linalg/dlasr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasr.f"> 
+*> Download DLASR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, PIVOT, SIDE
 *       INTEGER            LDA, M, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( * ), S( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -36,35 +36,35 @@
 *>
 *> DLASR applies a sequence of plane rotations to a real matrix A,
 *> from either the left or the right.
-*> 
+*>
 *> When SIDE = 'L', the transformation takes the form
-*> 
+*>
 *>    A := P*A
-*> 
+*>
 *> and when SIDE = 'R', the transformation takes the form
-*> 
+*>
 *>    A := A*P**T
-*> 
+*>
 *> where P is an orthogonal matrix consisting of a sequence of z plane
 *> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
 *> and P**T is the transpose of P.
-*> 
+*>
 *> When DIRECT = 'F' (Forward sequence), then
-*> 
+*>
 *>    P = P(z-1) * ... * P(2) * P(1)
-*> 
+*>
 *> and when DIRECT = 'B' (Backward sequence), then
-*> 
+*>
 *>    P = P(1) * P(2) * ... * P(z-1)
-*> 
+*>
 *> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
-*> 
+*>
 *>    R(k) = (  c(k)  s(k) )
 *>         = ( -s(k)  c(k) ).
-*> 
+*>
 *> When PIVOT = 'V' (Variable pivot), the rotation is performed
 *> for the plane (k,k+1), i.e., P(k) has the form
-*> 
+*>
 *>    P(k) = (  1                                            )
 *>           (       ...                                     )
 *>           (              1                                )
@@ -73,13 +73,13 @@
 *>           (                                1              )
 *>           (                                     ...       )
 *>           (                                            1  )
-*> 
+*>
 *> where R(k) appears as a rank-2 modification to the identity matrix in
 *> rows and columns k and k+1.
-*> 
+*>
 *> When PIVOT = 'T' (Top pivot), the rotation is performed for the
 *> plane (1,k+1), so P(k) has the form
-*> 
+*>
 *>    P(k) = (  c(k)                    s(k)                 )
 *>           (         1                                     )
 *>           (              ...                              )
@@ -88,12 +88,12 @@
 *>           (                                 1             )
 *>           (                                      ...      )
 *>           (                                             1 )
-*> 
+*>
 *> where R(k) appears in rows and columns 1 and k+1.
-*> 
+*>
 *> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
 *> performed for the plane (k,z), giving P(k) the form
-*> 
+*>
 *>    P(k) = ( 1                                             )
 *>           (      ...                                      )
 *>           (             1                                 )
@@ -102,7 +102,7 @@
 *>           (                              ...              )
 *>           (                                     1         )
 *>           (                 -s(k)                    c(k) )
-*> 
+*>
 *> where R(k) appears in rows and columns k and z.  The rotations are
 *> performed without ever forming P(k) explicitly.
 *> \endverbatim
@@ -187,22 +187,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, PIVOT, SIDE
diff --git a/lib/linalg/dlasrt.f b/lib/linalg/dlasrt.f
index f5d0e6cd1a8ceb180ec14fbf9293ab55ac4b6f94..4705311d78325b7d0b6188f324e29c64cc7eb5e2 100644
--- a/lib/linalg/dlasrt.f
+++ b/lib/linalg/dlasrt.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASRT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasrt.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasrt.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasrt.f"> 
+*> Download DLASRT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasrt.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasrt.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasrt.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASRT( ID, N, D, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          ID
 *       INTEGER            INFO, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -76,22 +76,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DLASRT( ID, N, D, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          ID
@@ -123,7 +123,7 @@
 *     ..
 *     .. Executable Statements ..
 *
-*     Test the input paramters.
+*     Test the input parameters.
 *
       INFO = 0
       DIR = -1
diff --git a/lib/linalg/dlassq.f b/lib/linalg/dlassq.f
index c7c4087e808e5e8588037b640d96541b85dac338..885395e3c970eec247d8b84a3f5c6c80558efed9 100644
--- a/lib/linalg/dlassq.f
+++ b/lib/linalg/dlassq.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASSQ + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlassq.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlassq.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlassq.f"> 
+*> Download DLASSQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlassq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlassq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlassq.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       DOUBLE PRECISION   SCALE, SUMSQ
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -91,22 +91,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/dlasv2.f b/lib/linalg/dlasv2.f
index 96aaa1e45c77b75f9c7617c599cd45e29d9545d4..9371d6d3b2df1cf5d30cc85af8a3e6e3bcb14e3a 100644
--- a/lib/linalg/dlasv2.f
+++ b/lib/linalg/dlasv2.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASV2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasv2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasv2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasv2.f"> 
+*> Download DLASV2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasv2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasv2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasv2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL )
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION   CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,14 +102,14 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -138,10 +138,10 @@
 *  =====================================================================
       SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION   CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN
diff --git a/lib/linalg/dlaswp.f b/lib/linalg/dlaswp.f
index 937e12b2f02fbf2ce911beee1964d276b0d361fa..202fd8df1bac9cf8d84ea31f78f4ececbf7f79ad 100644
--- a/lib/linalg/dlaswp.f
+++ b/lib/linalg/dlaswp.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLASWP + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaswp.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaswp.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaswp.f"> 
+*> Download DLASWP + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaswp.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaswp.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaswp.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, K1, K2, LDA, N
 *       ..
@@ -27,7 +27,7 @@
 *       INTEGER            IPIV( * )
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -71,34 +71,35 @@
 *> \param[in] K2
 *> \verbatim
 *>          K2 is INTEGER
-*>          The last element of IPIV for which a row interchange will
-*>          be done.
+*>          (K2-K1+1) is the number of elements of IPIV for which a row
+*>          interchange will be done.
 *> \endverbatim
 *>
 *> \param[in] IPIV
 *> \verbatim
-*>          IPIV is INTEGER array, dimension (K2*abs(INCX))
-*>          The vector of pivot indices.  Only the elements in positions
-*>          K1 through K2 of IPIV are accessed.
-*>          IPIV(K) = L implies rows K and L are to be interchanged.
+*>          IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX))
+*>          The vector of pivot indices. Only the elements in positions
+*>          K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed.
+*>          IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be
+*>          interchanged.
 *> \endverbatim
 *>
 *> \param[in] INCX
 *> \verbatim
 *>          INCX is INTEGER
-*>          The increment between successive values of IPIV.  If IPIV
+*>          The increment between successive values of IPIV. If INCX
 *>          is negative, the pivots are applied in reverse order.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2017
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -114,10 +115,10 @@
 *  =====================================================================
       SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, K1, K2, LDA, N
@@ -135,7 +136,8 @@
 *     ..
 *     .. Executable Statements ..
 *
-*     Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*     Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows
+*     K1 through K2.
 *
       IF( INCX.GT.0 ) THEN
          IX0 = K1
@@ -143,7 +145,7 @@
          I2 = K2
          INC = 1
       ELSE IF( INCX.LT.0 ) THEN
-         IX0 = 1 + ( 1-K2 )*INCX
+         IX0 = K1 + ( K1-K2 )*INCX
          I1 = K2
          I2 = K1
          INC = -1
diff --git a/lib/linalg/dlatrd.f b/lib/linalg/dlatrd.f
index 69ec0018be6c28fb1244e26172968765fee18de1..a1df43e48a5381a50b7a115f6d317415e0850e69 100644
--- a/lib/linalg/dlatrd.f
+++ b/lib/linalg/dlatrd.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLATRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlatrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlatrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlatrd.f"> 
+*> Download DLATRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlatrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlatrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlatrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, LDW, N, NB
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), E( * ), TAU( * ), W( LDW, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -134,12 +134,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -198,10 +198,10 @@
 *  =====================================================================
       SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dlatrs.f b/lib/linalg/dlatrs.f
index b34795eb1579a205de813af80f9d86f1e4afe443..5ad5f66c55dfb227d89592fde75b8aa9f5a93033 100644
--- a/lib/linalg/dlatrs.f
+++ b/lib/linalg/dlatrs.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DLATRS + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlatrs.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlatrs.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlatrs.f"> 
+*> Download DLATRS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlatrs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlatrs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlatrs.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
 *                          CNORM, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
 *       INTEGER            INFO, LDA, N
@@ -29,7 +29,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), CNORM( * ), X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -153,12 +153,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
@@ -238,10 +238,10 @@
       SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
      $                   CNORM, INFO )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORMIN, TRANS, UPLO
diff --git a/lib/linalg/dnrm2.f b/lib/linalg/dnrm2.f
index 5ea257a2004bc8adc91ccf51112bc50e7d8cc2fa..30552e1d1dc252c2673438152706be549f208b7c 100644
--- a/lib/linalg/dnrm2.f
+++ b/lib/linalg/dnrm2.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -29,15 +29,35 @@
 *>    DNRM2 := sqrt( x'*x )
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] X
+*> \verbatim
+*>          X is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -54,10 +74,10 @@
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/dorg2l.f b/lib/linalg/dorg2l.f
index b95fa50fc52e0b4da682052cd46e400b6ad2bcee..36ff4e5d4b281c3f51ded4ae3f8b573dc2aab1c7 100644
--- a/lib/linalg/dorg2l.f
+++ b/lib/linalg/dorg2l.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORG2L + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorg2l.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorg2l.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorg2l.f"> 
+*> Download DORG2L + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorg2l.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorg2l.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorg2l.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/dorg2r.f b/lib/linalg/dorg2r.f
index 86df6dddc7fce64bb161b4b4451ff6365e0b3464..4b71011a9ff65aaf9c4cc5cd0aea36143b8115b8 100644
--- a/lib/linalg/dorg2r.f
+++ b/lib/linalg/dorg2r.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORG2R + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorg2r.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorg2r.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorg2r.f"> 
+*> Download DORG2R + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorg2r.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorg2r.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorg2r.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/dorgbr.f b/lib/linalg/dorgbr.f
index ddfa7262a0654872ec3e9f85e4e98455938f303f..cfebda5abdbb8303a07a7d26bea47b1b11e269f2 100644
--- a/lib/linalg/dorgbr.f
+++ b/lib/linalg/dorgbr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGBR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgbr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgbr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgbr.f"> 
+*> Download DORGBR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgbr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgbr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgbr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          VECT
 *       INTEGER            INFO, K, LDA, LWORK, M, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -145,10 +145,10 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
 *> \date April 2012
 *
@@ -157,7 +157,7 @@
 *  =====================================================================
       SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.1) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     April 2012
@@ -182,8 +182,7 @@
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
-      INTEGER            ILAENV
-      EXTERNAL           LSAME, ILAENV
+      EXTERNAL           LSAME
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           DORGLQ, DORGQR, XERBLA
diff --git a/lib/linalg/dorgl2.f b/lib/linalg/dorgl2.f
index 3e8398b73f872c1b8301900cf302a7d3383d49ce..5d8985d7589f104d067a4cabd0ae797f142b76ba 100644
--- a/lib/linalg/dorgl2.f
+++ b/lib/linalg/dorgl2.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGL2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgl2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgl2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgl2.f"> 
+*> Download DORGL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgl2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgl2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgl2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -101,22 +101,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/dorglq.f b/lib/linalg/dorglq.f
index 88aec15005ca7a708534a0ff7469224902d40f84..912b5de84ed345ac1845e87d2668eabf75bdaead 100644
--- a/lib/linalg/dorglq.f
+++ b/lib/linalg/dorglq.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGLQ + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorglq.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorglq.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorglq.f"> 
+*> Download DORGLQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorglq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorglq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorglq.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -115,22 +115,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, LWORK, M, N
diff --git a/lib/linalg/dorgql.f b/lib/linalg/dorgql.f
index ca4698d799dbb9ef7568a696210891aaece4ad10..ea12be91b11112fe1489629001f9a91572e48871 100644
--- a/lib/linalg/dorgql.f
+++ b/lib/linalg/dorgql.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGQL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgql.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgql.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgql.f"> 
+*> Download DORGQL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgql.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgql.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgql.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,22 +116,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, LWORK, M, N
diff --git a/lib/linalg/dorgqr.f b/lib/linalg/dorgqr.f
index 404ab184e655145c301fda51efa7a4c2f5c1f0fc..628eeacba702afe0b43b0b93ad7064274c0ecbcb 100644
--- a/lib/linalg/dorgqr.f
+++ b/lib/linalg/dorgqr.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgqr.f"> 
+*> Download DORGQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,22 +116,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, LWORK, M, N
diff --git a/lib/linalg/dorgtr.f b/lib/linalg/dorgtr.f
index 06a7b6cc1cdc5bd335ce9e50777a006ebfab3c06..72623eac06d1876429075783549b24c8011fb4cd 100644
--- a/lib/linalg/dorgtr.f
+++ b/lib/linalg/dorgtr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORGTR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtr.f"> 
+*> Download DORGTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -111,22 +111,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dorm2l.f b/lib/linalg/dorm2l.f
index 3ff25869a7524b58a6bd6fb6b6a437c8110060e0..1014cb2378398fd77d0c9c022e82faf2097e4701 100644
--- a/lib/linalg/dorm2l.f
+++ b/lib/linalg/dorm2l.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORM2L + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorm2l.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorm2l.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorm2l.f"> 
+*> Download DORM2L + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorm2l.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorm2l.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorm2l.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -146,12 +146,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -159,10 +159,10 @@
       SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
diff --git a/lib/linalg/dorm2r.f b/lib/linalg/dorm2r.f
index b13f12d53cb4bd14a5ca28333f2b5073cd1a720f..632b70e740caa5d70026bda87b0cc7953af8567f 100644
--- a/lib/linalg/dorm2r.f
+++ b/lib/linalg/dorm2r.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORM2R + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorm2r.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorm2r.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorm2r.f"> 
+*> Download DORM2R + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorm2r.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorm2r.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorm2r.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -146,12 +146,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -159,10 +159,10 @@
       SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
diff --git a/lib/linalg/dormbr.f b/lib/linalg/dormbr.f
index 7a0d9b9038527296cd528b5edf62cd3cdaa2c239..f035d0ae66fa2f9574bfe4ffc85e0865b9b8ea32 100644
--- a/lib/linalg/dormbr.f
+++ b/lib/linalg/dormbr.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORMBR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormbr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormbr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormbr.f"> 
+*> Download DORMBR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormbr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormbr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormbr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
 *                          LDC, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS, VECT
 *       INTEGER            INFO, K, LDA, LDC, LWORK, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -182,12 +182,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -195,10 +195,10 @@
       SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
      $                   LDC, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS, VECT
diff --git a/lib/linalg/dorml2.f b/lib/linalg/dorml2.f
index 9ae2396e12da7f83e9aa095f7e21ed1d9b6556a9..2c55c7f1fd380a03a831773bf01747dbdd5c25d3 100644
--- a/lib/linalg/dorml2.f
+++ b/lib/linalg/dorml2.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORML2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorml2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorml2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorml2.f"> 
+*> Download DORML2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorml2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorml2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorml2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -146,12 +146,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -159,10 +159,10 @@
       SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
diff --git a/lib/linalg/dormlq.f b/lib/linalg/dormlq.f
index ebbd4d26e2cb3e9dce593896a9d72f272d829d12..bb5469d273bbb38697c0d67eb78f09102896b6bb 100644
--- a/lib/linalg/dormlq.f
+++ b/lib/linalg/dormlq.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORMLQ + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormlq.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormlq.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormlq.f"> 
+*> Download DORMLQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormlq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormlq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormlq.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, LWORK, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -136,9 +136,7 @@
 *>          The dimension of the array WORK.
 *>          If SIDE = 'L', LWORK >= max(1,N);
 *>          if SIDE = 'R', LWORK >= max(1,M).
-*>          For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*>          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*>          blocksize.
+*>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
@@ -156,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -169,10 +167,10 @@
       SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
@@ -185,18 +183,16 @@
 *  =====================================================================
 *
 *     .. Parameters ..
-      INTEGER            NBMAX, LDT
-      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      INTEGER            NBMAX, LDT, TSIZE
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1,
+     $                     TSIZE = LDT*NBMAX )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            LEFT, LQUERY, NOTRAN
       CHARACTER          TRANST
-      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
      $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
 *     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   T( LDT, NBMAX )
-*     ..
 *     .. External Functions ..
       LOGICAL            LSAME
       INTEGER            ILAENV
@@ -246,12 +242,11 @@
 *
       IF( INFO.EQ.0 ) THEN
 *
-*        Determine the block size.  NB may be at most NBMAX, where NBMAX
-*        is used to define the local array T.
+*        Compute the workspace requirements
 *
          NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K,
      $        -1 ) )
-         LWKOPT = MAX( 1, NW )*NB
+         LWKOPT = MAX( 1, NW )*NB + TSIZE
          WORK( 1 ) = LWKOPT
       END IF
 *
@@ -272,14 +267,11 @@
       NBMIN = 2
       LDWORK = NW
       IF( NB.GT.1 .AND. NB.LT.K ) THEN
-         IWS = NW*NB
-         IF( LWORK.LT.IWS ) THEN
-            NB = LWORK / LDWORK
+         IF( LWORK.LT.NW*NB+TSIZE ) THEN
+            NB = (LWORK-TSIZE) / LDWORK
             NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K,
      $              -1 ) )
          END IF
-      ELSE
-         IWS = NW
       END IF
 *
       IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -292,6 +284,7 @@
 *
 *        Use blocked code
 *
+         IWT = 1 + NW*NB
          IF( ( LEFT .AND. NOTRAN ) .OR.
      $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
             I1 = 1
@@ -324,7 +317,7 @@
 *           H = H(i) H(i+1) . . . H(i+ib-1)
 *
             CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
-     $                   LDA, TAU( I ), T, LDT )
+     $                   LDA, TAU( I ), WORK( IWT ), LDT )
             IF( LEFT ) THEN
 *
 *              H or H**T is applied to C(i:m,1:n)
@@ -342,8 +335,8 @@
 *           Apply H or H**T
 *
             CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
-     $                   A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
-     $                   LDWORK )
+     $                   A( I, I ), LDA, WORK( IWT ), LDT,
+     $                   C( IC, JC ), LDC, WORK, LDWORK )
    10    CONTINUE
       END IF
       WORK( 1 ) = LWKOPT
diff --git a/lib/linalg/dormql.f b/lib/linalg/dormql.f
index 96c6f1958ed492c358454180e27e3e70d047b3ac..7d2b5d6c32bb618c026a305de7bca5d28a2cfd09 100644
--- a/lib/linalg/dormql.f
+++ b/lib/linalg/dormql.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORMQL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormql.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormql.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormql.f"> 
+*> Download DORMQL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormql.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormql.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormql.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, LWORK, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -136,9 +136,7 @@
 *>          The dimension of the array WORK.
 *>          If SIDE = 'L', LWORK >= max(1,N);
 *>          if SIDE = 'R', LWORK >= max(1,M).
-*>          For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*>          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*>          blocksize.
+*>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
@@ -156,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -169,10 +167,10 @@
       SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
@@ -185,17 +183,15 @@
 *  =====================================================================
 *
 *     .. Parameters ..
-      INTEGER            NBMAX, LDT
-      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      INTEGER            NBMAX, LDT, TSIZE
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1,
+     $                     TSIZE = LDT*NBMAX )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            LEFT, LQUERY, NOTRAN
-      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
      $                   MI, NB, NBMIN, NI, NQ, NW
 *     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   T( LDT, NBMAX )
-*     ..
 *     .. External Functions ..
       LOGICAL            LSAME
       INTEGER            ILAENV
@@ -239,25 +235,22 @@
          INFO = -7
       ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
          INFO = -10
+      ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+         INFO = -12
       END IF
 *
       IF( INFO.EQ.0 ) THEN
+*
+*        Compute the workspace requirements
+*
          IF( M.EQ.0 .OR. N.EQ.0 ) THEN
             LWKOPT = 1
          ELSE
-*
-*           Determine the block size.  NB may be at most NBMAX, where
-*           NBMAX is used to define the local array T.
-*
             NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N,
      $                               K, -1 ) )
-            LWKOPT = NW*NB
+            LWKOPT = NW*NB + TSIZE
          END IF
          WORK( 1 ) = LWKOPT
-*
-         IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
-            INFO = -12
-         END IF
       END IF
 *
       IF( INFO.NE.0 ) THEN
@@ -276,14 +269,11 @@
       NBMIN = 2
       LDWORK = NW
       IF( NB.GT.1 .AND. NB.LT.K ) THEN
-         IWS = NW*NB
-         IF( LWORK.LT.IWS ) THEN
-            NB = LWORK / LDWORK
+         IF( LWORK.LT.NW*NB+TSIZE ) THEN
+            NB = (LWORK-TSIZE) / LDWORK
             NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K,
      $              -1 ) )
          END IF
-      ELSE
-         IWS = NW
       END IF
 *
       IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -296,6 +286,7 @@
 *
 *        Use blocked code
 *
+         IWT = 1 + NW*NB
          IF( ( LEFT .AND. NOTRAN ) .OR.
      $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
             I1 = 1
@@ -320,7 +311,7 @@
 *           H = H(i+ib-1) . . . H(i+1) H(i)
 *
             CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
-     $                   A( 1, I ), LDA, TAU( I ), T, LDT )
+     $                   A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
             IF( LEFT ) THEN
 *
 *              H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -336,8 +327,8 @@
 *           Apply H or H**T
 *
             CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
-     $                   IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
-     $                   LDWORK )
+     $                   IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+     $                   WORK, LDWORK )
    10    CONTINUE
       END IF
       WORK( 1 ) = LWKOPT
diff --git a/lib/linalg/dormqr.f b/lib/linalg/dormqr.f
index c0767ecf61e2f07f4e7974df3f8a48b7fec5a4bf..7f2ebb9ace5f6116df657954cdd0591916376f91 100644
--- a/lib/linalg/dormqr.f
+++ b/lib/linalg/dormqr.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORMQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormqr.f"> 
+*> Download DORMQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 *                          WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS
 *       INTEGER            INFO, K, LDA, LDC, LWORK, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -136,9 +136,7 @@
 *>          The dimension of the array WORK.
 *>          If SIDE = 'L', LWORK >= max(1,N);
 *>          if SIDE = 'R', LWORK >= max(1,M).
-*>          For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*>          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*>          blocksize.
+*>          For good performance, LWORK should generally be larger.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal size of the WORK array, returns
@@ -156,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -169,10 +167,10 @@
       SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
      $                   WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS
@@ -185,17 +183,15 @@
 *  =====================================================================
 *
 *     .. Parameters ..
-      INTEGER            NBMAX, LDT
-      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      INTEGER            NBMAX, LDT, TSIZE
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1,
+     $                     TSIZE = LDT*NBMAX )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            LEFT, LQUERY, NOTRAN
-      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
      $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
 *     ..
-*     .. Local Arrays ..
-      DOUBLE PRECISION   T( LDT, NBMAX )
-*     ..
 *     .. External Functions ..
       LOGICAL            LSAME
       INTEGER            ILAENV
@@ -245,12 +241,11 @@
 *
       IF( INFO.EQ.0 ) THEN
 *
-*        Determine the block size.  NB may be at most NBMAX, where NBMAX
-*        is used to define the local array T.
+*        Compute the workspace requirements
 *
          NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K,
      $        -1 ) )
-         LWKOPT = MAX( 1, NW )*NB
+         LWKOPT = MAX( 1, NW )*NB + TSIZE
          WORK( 1 ) = LWKOPT
       END IF
 *
@@ -271,14 +266,11 @@
       NBMIN = 2
       LDWORK = NW
       IF( NB.GT.1 .AND. NB.LT.K ) THEN
-         IWS = NW*NB
-         IF( LWORK.LT.IWS ) THEN
-            NB = LWORK / LDWORK
+         IF( LWORK.LT.NW*NB+TSIZE ) THEN
+            NB = (LWORK-TSIZE) / LDWORK
             NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K,
      $              -1 ) )
          END IF
-      ELSE
-         IWS = NW
       END IF
 *
       IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -291,6 +283,7 @@
 *
 *        Use blocked code
 *
+         IWT = 1 + NW*NB
          IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
      $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
             I1 = 1
@@ -317,7 +310,7 @@
 *           H = H(i) H(i+1) . . . H(i+ib-1)
 *
             CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
-     $                   LDA, TAU( I ), T, LDT )
+     $                   LDA, TAU( I ), WORK( IWT ), LDT )
             IF( LEFT ) THEN
 *
 *              H or H**T is applied to C(i:m,1:n)
@@ -335,8 +328,8 @@
 *           Apply H or H**T
 *
             CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
-     $                   IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
-     $                   WORK, LDWORK )
+     $                   IB, A( I, I ), LDA, WORK( IWT ), LDT,
+     $                   C( IC, JC ), LDC, WORK, LDWORK )
    10    CONTINUE
       END IF
       WORK( 1 ) = LWKOPT
diff --git a/lib/linalg/dormtr.f b/lib/linalg/dormtr.f
index 00fff4dda2a6546fccd1cfcb792956268813568d..d2443c1dac51b6957a34bcc3342e695578969f8b 100644
--- a/lib/linalg/dormtr.f
+++ b/lib/linalg/dormtr.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DORMTR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormtr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormtr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormtr.f"> 
+*> Download DORMTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dormtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dormtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dormtr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
 *                          WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE, TRANS, UPLO
 *       INTEGER            INFO, LDA, LDC, LWORK, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -158,12 +158,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
@@ -171,10 +171,10 @@
       SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
      $                   WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE, TRANS, UPLO
diff --git a/lib/linalg/dpotf2.f b/lib/linalg/dpotf2.f
index 6003e19b055ca2075781ebdabde72d0723d9c1ac..1fb60a903b42cae801ac8ca984b09c45140d392b 100644
--- a/lib/linalg/dpotf2.f
+++ b/lib/linalg/dpotf2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DPOTF2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dpotf2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dpotf2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dpotf2.f"> 
+*> Download DPOTF2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dpotf2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dpotf2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dpotf2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -97,22 +97,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doublePOcomputational
 *
 *  =====================================================================
       SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dpotrf.f b/lib/linalg/dpotrf.f
index 3457230b56724e0d8adb350fb34ba634a4a63d53..1fa75a465424f69d9df88f695c41fb79efe79729 100644
--- a/lib/linalg/dpotrf.f
+++ b/lib/linalg/dpotrf.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DPOTRF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dpotrf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dpotrf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dpotrf.f"> 
+*> Download DPOTRF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dpotrf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dpotrf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dpotrf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -95,22 +95,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doublePOcomputational
 *
 *  =====================================================================
       SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
@@ -136,7 +136,7 @@
       EXTERNAL           LSAME, ILAENV
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+      EXTERNAL           DGEMM, DPOTRF2, DSYRK, DTRSM, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          MAX, MIN
@@ -171,7 +171,7 @@
 *
 *        Use unblocked code.
 *
-         CALL DPOTF2( UPLO, N, A, LDA, INFO )
+         CALL DPOTRF2( UPLO, N, A, LDA, INFO )
       ELSE
 *
 *        Use blocked code.
@@ -188,7 +188,7 @@
                JB = MIN( NB, N-J+1 )
                CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
      $                     A( 1, J ), LDA, ONE, A( J, J ), LDA )
-               CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               CALL DPOTRF2( 'Upper', JB, A( J, J ), LDA, INFO )
                IF( INFO.NE.0 )
      $            GO TO 30
                IF( J+JB.LE.N ) THEN
@@ -216,7 +216,7 @@
                JB = MIN( NB, N-J+1 )
                CALL DSYRK( 'Lower', 'No transpose', JB, J-1, -ONE,
      $                     A( J, 1 ), LDA, ONE, A( J, J ), LDA )
-               CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               CALL DPOTRF2( 'Lower', JB, A( J, J ), LDA, INFO )
                IF( INFO.NE.0 )
      $            GO TO 30
                IF( J+JB.LE.N ) THEN
diff --git a/lib/linalg/dpotrf2.f b/lib/linalg/dpotrf2.f
new file mode 100644
index 0000000000000000000000000000000000000000..0d419c4f008595bc8e86d8e128a789d138d1b25d
--- /dev/null
+++ b/lib/linalg/dpotrf2.f
@@ -0,0 +1,237 @@
+*> \brief \b DPOTRF2
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          UPLO
+*       INTEGER            INFO, LDA, N
+*       ..
+*       .. Array Arguments ..
+*       REAL               A( LDA, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DPOTRF2 computes the Cholesky factorization of a real symmetric
+*> positive definite matrix A using the recursive algorithm.
+*>
+*> The factorization has the form
+*>    A = U**T * U,  if UPLO = 'U', or
+*>    A = L  * L**T,  if UPLO = 'L',
+*> where U is an upper triangular matrix and L is lower triangular.
+*>
+*> This is the recursive version of the algorithm. It divides
+*> the matrix into four submatrices:
+*>
+*>        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2
+*>    A = [ -----|----- ]  with n1 = n/2
+*>        [  A21 | A22  ]       n2 = n-n1
+*>
+*> The subroutine calls itself to factor A11. Update and scale A21
+*> or A12, update A22 then calls itself to factor A22.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>          = 'U':  Upper triangle of A is stored;
+*>          = 'L':  Lower triangle of A is stored.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*>          N-by-N upper triangular part of A contains the upper
+*>          triangular part of the matrix A, and the strictly lower
+*>          triangular part of A is not referenced.  If UPLO = 'L', the
+*>          leading N-by-N lower triangular part of A contains the lower
+*>          triangular part of the matrix A, and the strictly upper
+*>          triangular part of A is not referenced.
+*>
+*>          On exit, if INFO = 0, the factor U or L from the Cholesky
+*>          factorization A = U**T*U or A = L*L**T.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,N).
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*>          > 0:  if INFO = i, the leading minor of order i is not
+*>                positive definite, and the factorization could not be
+*>                completed.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doublePOcomputational
+*
+*  =====================================================================
+      RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            N1, N2, IINFO
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME, DISNAN
+      EXTERNAL           LSAME, DISNAN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSYRK, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     N=1 case
+*
+      IF( N.EQ.1 ) THEN
+*
+*        Test for non-positive-definiteness
+*
+         IF( A( 1, 1 ).LE.ZERO.OR.DISNAN( A( 1, 1 ) ) ) THEN
+            INFO = 1
+            RETURN
+         END IF
+*
+*        Factor
+*
+         A( 1, 1 ) = SQRT( A( 1, 1 ) )
+*
+*     Use recursive code
+*
+      ELSE
+         N1 = N/2
+         N2 = N-N1
+*
+*        Factor A11
+*
+         CALL DPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO )
+         IF ( IINFO.NE.0 ) THEN
+            INFO = IINFO
+            RETURN
+         END IF
+*
+*        Compute the Cholesky factorization A = U**T*U
+*
+         IF( UPPER ) THEN
+*
+*           Update and scale A12
+*
+            CALL DTRSM( 'L', 'U', 'T', 'N', N1, N2, ONE,
+     $                  A( 1, 1 ), LDA, A( 1, N1+1 ), LDA )
+*
+*           Update and factor A22
+*
+            CALL DSYRK( UPLO, 'T', N2, N1, -ONE, A( 1, N1+1 ), LDA,
+     $                  ONE, A( N1+1, N1+1 ), LDA )
+            CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO )
+            IF ( IINFO.NE.0 ) THEN
+               INFO = IINFO + N1
+               RETURN
+            END IF
+*
+*        Compute the Cholesky factorization A = L*L**T
+*
+         ELSE
+*
+*           Update and scale A21
+*
+            CALL DTRSM( 'R', 'L', 'T', 'N', N2, N1, ONE,
+     $                  A( 1, 1 ), LDA, A( N1+1, 1 ), LDA )
+*
+*           Update and factor A22
+*
+            CALL DSYRK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA,
+     $                  ONE, A( N1+1, N1+1 ), LDA )
+            CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO )
+            IF ( IINFO.NE.0 ) THEN
+               INFO = IINFO + N1
+               RETURN
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of DPOTRF2
+*
+      END
diff --git a/lib/linalg/drot.f b/lib/linalg/drot.f
index 1615ef6a875c9f44d8c4a399dc7ed784283b0669..0d33ea76c859d424e6d7208498415ba811c22b39 100644
--- a/lib/linalg/drot.f
+++ b/lib/linalg/drot.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION C,S
 *       INTEGER INCX,INCY,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -27,15 +27,56 @@
 *>    DROT applies a plane rotation.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in,out] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*>
+*> \param[in,out] DY
+*> \verbatim
+*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of DY
+*> \endverbatim
+*>
+*> \param[in] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*> \endverbatim
+*>
+*> \param[in] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -51,10 +92,10 @@
 *  =====================================================================
       SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION C,S
diff --git a/lib/linalg/drscl.f b/lib/linalg/drscl.f
index 21ba19c11afd8b42b1dda66b1b8e378d3df9a6de..92511436801917b86c7c32ba8012c57b4a971867 100644
--- a/lib/linalg/drscl.f
+++ b/lib/linalg/drscl.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DRSCL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/drscl.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/drscl.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/drscl.f"> 
+*> Download DRSCL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/drscl.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/drscl.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/drscl.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DRSCL( N, SA, SX, INCX )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       DOUBLE PRECISION   SA
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   SX( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -72,22 +72,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DRSCL( N, SA, SX, INCX )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/dscal.f b/lib/linalg/dscal.f
index 3337de8e63c498715f5d9e90284dab9c6607a170..e0a92de6ba652a525c356439326bf3f2813d287a 100644
--- a/lib/linalg/dscal.f
+++ b/lib/linalg/dscal.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSCAL(N,DA,DX,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION DA
 *       INTEGER INCX,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -25,18 +25,44 @@
 *> \verbatim
 *>
 *>    DSCAL scales a vector by a constant.
-*>    uses unrolled loops for increment equal to one.
+*>    uses unrolled loops for increment equal to 1.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DA
+*> \verbatim
+*>          DA is DOUBLE PRECISION
+*>           On entry, DA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in,out] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -53,10 +79,10 @@
 *  =====================================================================
       SUBROUTINE DSCAL(N,DA,DX,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION DA
diff --git a/lib/linalg/dstedc.f b/lib/linalg/dstedc.f
index b59e1c3bbd56de572cfb3e7ce6bbcf759a14c3df..61b44bc06b5180fe7c12cd02c384291756a4ae66 100644
--- a/lib/linalg/dstedc.f
+++ b/lib/linalg/dstedc.f
@@ -1,26 +1,26 @@
-*> \brief \b DSTEBZ
+*> \brief \b DSTEDC
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSTEDC + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dstedc.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dstedc.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dstedc.f"> 
+*> Download DSTEDC + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dstedc.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dstedc.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dstedc.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
 *                          LIWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          COMPZ
 *       INTEGER            INFO, LDZ, LIWORK, LWORK, N
@@ -29,7 +29,7 @@
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -105,8 +105,7 @@
 *>
 *> \param[out] WORK
 *> \verbatim
-*>          WORK is DOUBLE PRECISION array,
-*>                                         dimension (LWORK)
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
@@ -169,12 +168,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date June 2017
 *
 *> \ingroup auxOTHERcomputational
 *
@@ -189,10 +188,10 @@
       SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
      $                   LIWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     June 2017
 *
 *     .. Scalar Arguments ..
       CHARACTER          COMPZ
@@ -443,38 +442,32 @@
 *
 *        endwhile
 *
-*        If the problem split any number of times, then the eigenvalues
-*        will not be properly ordered.  Here we permute the eigenvalues
-*        (and the associated eigenvectors) into ascending order.
-*
-         IF( M.NE.N ) THEN
-            IF( ICOMPZ.EQ.0 ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
 *
-*              Use Quick Sort
+*          Use Quick Sort
 *
-               CALL DLASRT( 'I', N, D, INFO )
+           CALL DLASRT( 'I', N, D, INFO )
 *
-            ELSE
+         ELSE
 *
-*              Use Selection Sort to minimize swaps of eigenvectors
-*
-               DO 40 II = 2, N
-                  I = II - 1
-                  K = I
-                  P = D( I )
-                  DO 30 J = II, N
-                     IF( D( J ).LT.P ) THEN
-                        K = J
-                        P = D( J )
-                     END IF
-   30             CONTINUE
-                  IF( K.NE.I ) THEN
-                     D( K ) = D( I )
-                     D( I ) = P
-                     CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
-                  END IF
-   40          CONTINUE
-            END IF
+*          Use Selection Sort to minimize swaps of eigenvectors
+*
+           DO 40 II = 2, N
+              I = II - 1
+              K = I
+              P = D( I )
+              DO 30 J = II, N
+                 IF( D( J ).LT.P ) THEN
+                    K = J
+                    P = D( J )
+                 END IF
+   30         CONTINUE
+              IF( K.NE.I ) THEN
+                 D( K ) = D( I )
+                 D( I ) = P
+                 CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+              END IF
+   40      CONTINUE
          END IF
       END IF
 *
diff --git a/lib/linalg/dsteqr.f b/lib/linalg/dsteqr.f
index 9e165bb6bbeb523e05e0859d67cbfb60eab7c779..c34a548984aeaf252dd29040ccf33a2616d84b10 100644
--- a/lib/linalg/dsteqr.f
+++ b/lib/linalg/dsteqr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSTEQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsteqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsteqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsteqr.f"> 
+*> Download DSTEQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsteqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsteqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsteqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          COMPZ
 *       INTEGER            INFO, LDZ, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -119,22 +119,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          COMPZ
diff --git a/lib/linalg/dsterf.f b/lib/linalg/dsterf.f
index b93cc13dd6ada6eb41882a143898eb7ebf8fe4c2..3401894819aee18d6ab0742c7b7327e25efc2a7c 100644
--- a/lib/linalg/dsterf.f
+++ b/lib/linalg/dsterf.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSTERF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsterf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsterf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsterf.f"> 
+*> Download DSTERF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsterf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsterf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsterf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSTERF( N, D, E, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   D( * ), E( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -74,22 +74,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup auxOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DSTERF( N, D, E, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, N
@@ -190,7 +190,7 @@
       ANORM = DLANST( 'M', LEND-L+1, D( L ), E( L ) )
       ISCALE = 0
       IF( ANORM.EQ.ZERO )
-     $   GO TO 10      
+     $   GO TO 10
       IF( (ANORM.GT.SSFMAX) ) THEN
          ISCALE = 1
          CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
diff --git a/lib/linalg/dswap.f b/lib/linalg/dswap.f
index e567bd93ec3e5078dcfc86a19dcf6e1e0cdf37e3..94dfea3bb919eb115fea75777a27403da3ac8af6 100644
--- a/lib/linalg/dswap.f
+++ b/lib/linalg/dswap.f
@@ -2,40 +2,71 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSWAP(N,DX,INCX,DY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*),DY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
-*>    interchanges two vectors.
-*>    uses unrolled loops for increments equal one.
+*>    DSWAP interchanges two vectors.
+*>    uses unrolled loops for increments equal to 1.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in,out] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of DX
+*> \endverbatim
+*>
+*> \param[in,out] DY
+*> \verbatim
+*>          DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of DY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -51,10 +82,10 @@
 *  =====================================================================
       SUBROUTINE DSWAP(N,DX,INCX,DY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
diff --git a/lib/linalg/dsyev.f b/lib/linalg/dsyev.f
index 64b39ed84783e6eaa8df5220437c433ad3153afe..ee8c479abea1995ba16bd3a3930db3ab5205a906 100644
--- a/lib/linalg/dsyev.f
+++ b/lib/linalg/dsyev.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYEV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyev.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyev.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyev.f"> 
+*> Download DSYEV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyev.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyev.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyev.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBZ, UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -120,22 +120,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleSYeigen
 *
 *  =====================================================================
       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.0) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ, UPLO
diff --git a/lib/linalg/dsyevd.f b/lib/linalg/dsyevd.f
index 3c9545ac31a5f85ee054d46d8be9eeea2e66079e..2db67846dc309fcb81380c7ccedf53b9197b6eaa 100644
--- a/lib/linalg/dsyevd.f
+++ b/lib/linalg/dsyevd.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYEVD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd.f"> 
+*> Download DSYEVD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK,
 *                          LIWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBZ, UPLO
 *       INTEGER            INFO, LDA, LIWORK, LWORK, N
@@ -29,7 +29,7 @@
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -162,12 +162,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleSYeigen
 *
@@ -185,10 +185,10 @@
       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK,
      $                   LIWORK, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.2) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ, UPLO
diff --git a/lib/linalg/dsygs2.f b/lib/linalg/dsygs2.f
index 644dcfff1b79223dbd5e27830c943de7b906450f..a54955c01e3cec4e3a20165fc3d469470804dd4c 100644
--- a/lib/linalg/dsygs2.f
+++ b/lib/linalg/dsygs2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYGS2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygs2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygs2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygs2.f"> 
+*> Download DSYGS2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygs2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygs2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygs2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, ITYPE, LDA, LDB, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -115,22 +115,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleSYcomputational
 *
 *  =====================================================================
       SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dsygst.f b/lib/linalg/dsygst.f
index f1d5311c9afdee54560f1ce71f0b613228a952bb..5055acdf1de62b512f2ab294e06a7e3dff27d21d 100644
--- a/lib/linalg/dsygst.f
+++ b/lib/linalg/dsygst.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYGST + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygst.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygst.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygst.f"> 
+*> Download DSYGST + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygst.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygst.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygst.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, ITYPE, LDA, LDB, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -115,22 +115,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleSYcomputational
 *
 *  =====================================================================
       SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dsygv.f b/lib/linalg/dsygv.f
index e55631851869147a167a53aeeda1f72247794f1a..651abc5c7b18b7265c0fbf7ae26329c3cf892ba7 100644
--- a/lib/linalg/dsygv.f
+++ b/lib/linalg/dsygv.f
@@ -1,26 +1,26 @@
-*> \brief \b DSYGST
+*> \brief \b DSYGV
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYGV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygv.f"> 
+*> Download DSYGV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygv.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
 *                         LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBZ, UPLO
 *       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -162,12 +162,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleSYeigen
 *
@@ -175,10 +175,10 @@
       SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
      $                  LWORK, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.0) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ, UPLO
diff --git a/lib/linalg/dsygvd.f b/lib/linalg/dsygvd.f
index 171aa175ff11fdf0ae813221c348b0fd6e5be28e..29c78283a703986f750bd5b6ddaa3bfa82722af6 100644
--- a/lib/linalg/dsygvd.f
+++ b/lib/linalg/dsygvd.f
@@ -1,26 +1,26 @@
-*> \brief \b DSYGST
+*> \brief \b DSYGVD
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYGVD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygvd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygvd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygvd.f"> 
+*> Download DSYGVD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygvd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygvd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygvd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
 *                          LWORK, IWORK, LIWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBZ, UPLO
 *       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N
@@ -29,7 +29,7 @@
 *       INTEGER            IWORK( * )
 *       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -198,12 +198,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleSYeigen
 *
@@ -227,10 +227,10 @@
       SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
      $                   LWORK, IWORK, LIWORK, INFO )
 *
-*  -- LAPACK driver routine (version 3.4.0) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ, UPLO
diff --git a/lib/linalg/dsymm.f b/lib/linalg/dsymm.f
index ee8df4df4b2a616c861a859c79e8ba13f35d92a2..622d2469f15a6ee192a8e0ae458e614d69ec976e 100644
--- a/lib/linalg/dsymm.f
+++ b/lib/linalg/dsymm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER LDA,LDB,LDC,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -87,7 +87,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
 *>           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
 *>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
 *>           the array  A  must contain the  symmetric matrix,  such that
@@ -122,7 +122,7 @@
 *>
 *> \param[in] B
 *> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*>          B is DOUBLE PRECISION array, dimension ( LDB, N )
 *>           Before entry, the leading  m by n part of the array  B  must
 *>           contain the matrix B.
 *> \endverbatim
@@ -144,7 +144,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*>          C is DOUBLE PRECISION array, dimension ( LDC, N )
 *>           Before entry, the leading  m by n  part of the array  C must
 *>           contain the matrix  C,  except when  beta  is zero, in which
 *>           case C need not be set on entry.
@@ -163,12 +163,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -189,10 +189,10 @@
 *  =====================================================================
       SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
diff --git a/lib/linalg/dsymv.f b/lib/linalg/dsymv.f
index 552202383471a392774ccd6a6b37be9096755080..4bf973f10a443cc7629c594279dfc6260387d333 100644
--- a/lib/linalg/dsymv.f
+++ b/lib/linalg/dsymv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER INCX,INCY,LDA,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -65,7 +65,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular part of the symmetric matrix and the strictly
@@ -86,7 +86,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x.
@@ -108,7 +108,7 @@
 *>
 *> \param[in,out] Y
 *> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
+*>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y. On exit, Y is overwritten by the updated
@@ -125,12 +125,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level2
 *
@@ -152,10 +152,10 @@
 *  =====================================================================
       SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
diff --git a/lib/linalg/dsyr2.f b/lib/linalg/dsyr2.f
index 05e148105cb1ac3c3764d73ea0da67d76c3e27fa..8970c4dcfd932a247a7384dca53225dfd8f7e235 100644
--- a/lib/linalg/dsyr2.f
+++ b/lib/linalg/dsyr2.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER INCX,INCY,LDA,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -65,7 +65,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x.
@@ -80,7 +80,7 @@
 *>
 *> \param[in] Y
 *> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
+*>          Y is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y.
@@ -95,7 +95,7 @@
 *>
 *> \param[in,out] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular part of the symmetric matrix and the strictly
@@ -121,12 +121,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level2
 *
@@ -147,10 +147,10 @@
 *  =====================================================================
       SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA
diff --git a/lib/linalg/dsyr2k.f b/lib/linalg/dsyr2k.f
index 2dde293eae5d72a6e321140b567d23e5df19970f..f3a5940c7f7f88b311ff751787cdafbcf30b6271 100644
--- a/lib/linalg/dsyr2k.f
+++ b/lib/linalg/dsyr2k.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER K,LDA,LDB,LDC,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -95,7 +95,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
 *>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
@@ -114,7 +114,7 @@
 *>
 *> \param[in] B
 *> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*>          B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
 *>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
 *>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
 *>           part of the array  B  must contain the matrix  B,  otherwise
@@ -139,7 +139,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*>          C is DOUBLE PRECISION array, dimension ( LDC, N )
 *>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
 *>           upper triangular part of the array C must contain the upper
 *>           triangular part  of the  symmetric matrix  and the strictly
@@ -165,12 +165,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -192,10 +192,10 @@
 *  =====================================================================
       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
diff --git a/lib/linalg/dsyrk.f b/lib/linalg/dsyrk.f
index d91c3369f620c8ade13017920d3c690a6319f62c..4be4d8d3c4f53148d6b9371a39d5aed996c979e1 100644
--- a/lib/linalg/dsyrk.f
+++ b/lib/linalg/dsyrk.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA,BETA
 *       INTEGER K,LDA,LDC,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -92,7 +92,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
 *>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
@@ -117,7 +117,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*>          C is DOUBLE PRECISION array, dimension ( LDC, N )
 *>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
 *>           upper triangular part of the array C must contain the upper
 *>           triangular part  of the  symmetric matrix  and the strictly
@@ -143,12 +143,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -169,10 +169,10 @@
 *  =====================================================================
       SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA,BETA
diff --git a/lib/linalg/dsytd2.f b/lib/linalg/dsytd2.f
index a238f9ab3b830d267555ff8966ffb31cf5b568ca..6fb4d5507e7b204620438880003cb8c86a4a7f7f 100644
--- a/lib/linalg/dsytd2.f
+++ b/lib/linalg/dsytd2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYTD2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytd2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytd2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytd2.f"> 
+*> Download DSYTD2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytd2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytd2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytd2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAU( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -115,12 +115,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleSYcomputational
 *
@@ -173,10 +173,10 @@
 *  =====================================================================
       SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dsytrd.f b/lib/linalg/dsytrd.f
index b268f4c1e4d7ef46cc73c91f1f8c050995661711..d330b241fa27cdba2eba435cc4f4c28e1cb84b03 100644
--- a/lib/linalg/dsytrd.f
+++ b/lib/linalg/dsytrd.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DSYTRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd.f"> 
+*> Download DSYTRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAU( * ),
 *      $                   WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -134,12 +134,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleSYcomputational
 *
@@ -192,10 +192,10 @@
 *  =====================================================================
       SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/dtrmm.f b/lib/linalg/dtrmm.f
index cbd5ce7034a4444656c21b2169cd4ba7e66f909e..0241c4d1465bb3ea05dca4de1a5dade03f32ed54 100644
--- a/lib/linalg/dtrmm.f
+++ b/lib/linalg/dtrmm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER LDA,LDB,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -109,7 +109,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>           A is DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+*>           A is DOUBLE PRECISION array, dimension ( LDA, k ), where k is m
 *>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
 *>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
 *>           upper triangular part of the array  A must contain the upper
@@ -134,7 +134,7 @@
 *>
 *> \param[in,out] B
 *> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*>          B is DOUBLE PRECISION array, dimension ( LDB, N )
 *>           Before entry,  the leading  m by n part of the array  B must
 *>           contain the matrix  B,  and  on exit  is overwritten  by the
 *>           transformed matrix.
@@ -151,12 +151,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -177,10 +177,10 @@
 *  =====================================================================
       SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA
diff --git a/lib/linalg/dtrmv.f b/lib/linalg/dtrmv.f
index 71459fe7c87bb9ba7ea18063c81b9825a483f3f6..11c12ac724468ff59b017fd6afd995496781e85f 100644
--- a/lib/linalg/dtrmv.f
+++ b/lib/linalg/dtrmv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,LDA,N
 *       CHARACTER DIAG,TRANS,UPLO
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -80,7 +80,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular matrix and the strictly lower triangular part of
@@ -103,11 +103,11 @@
 *>
 *> \param[in,out] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x. On exit, X is overwritten with the
-*>           tranformed vector x.
+*>           transformed vector x.
 *> \endverbatim
 *>
 *> \param[in] INCX
@@ -120,12 +120,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level2
 *
@@ -147,10 +147,10 @@
 *  =====================================================================
       SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,LDA,N
diff --git a/lib/linalg/dtrsm.f b/lib/linalg/dtrsm.f
index 065df9a15332106e98b0ab5179e74c369bf1f6cd..5a92bcafd09557e07ddbe19124551b2bc46cbe6a 100644
--- a/lib/linalg/dtrsm.f
+++ b/lib/linalg/dtrsm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER LDA,LDB,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),B(LDB,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -111,8 +111,8 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'  
+*>          A is DOUBLE PRECISION array, dimension ( LDA, k ),
+*>           where k is m when SIDE = 'L' or 'l'
 *>             and k is n when SIDE = 'R' or 'r'.
 *>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
 *>           upper triangular part of the array  A must contain the upper
@@ -137,7 +137,7 @@
 *>
 *> \param[in,out] B
 *> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*>          B is DOUBLE PRECISION array, dimension ( LDB, N )
 *>           Before entry,  the leading  m by n part of the array  B must
 *>           contain  the  right-hand  side  matrix  B,  and  on exit  is
 *>           overwritten by the solution matrix  X.
@@ -154,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level3
 *
@@ -181,10 +181,10 @@
 *  =====================================================================
       SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA
diff --git a/lib/linalg/dtrsv.f b/lib/linalg/dtrsv.f
index e54303a93a0d88f44f0c37aee57e08668defd813..331f1d431180edfae8258a714b1a65e7775ca9f2 100644
--- a/lib/linalg/dtrsv.f
+++ b/lib/linalg/dtrsv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,LDA,N
 *       CHARACTER DIAG,TRANS,UPLO
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION A(LDA,*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -83,7 +83,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*>          A is DOUBLE PRECISION array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular matrix and the strictly lower triangular part of
@@ -106,7 +106,7 @@
 *>
 *> \param[in,out] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
+*>          X is DOUBLE PRECISION array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element right-hand side vector b. On exit, X is overwritten
@@ -131,22 +131,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
 *  =====================================================================
       SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,LDA,N
diff --git a/lib/linalg/dtrti2.f b/lib/linalg/dtrti2.f
index edf1b5b003ad42458cb3308b0706eb064daa60d0..0a9d5b696ccc5240475067b68ad9768ce6e3b09f 100644
--- a/lib/linalg/dtrti2.f
+++ b/lib/linalg/dtrti2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DTRTI2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrti2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrti2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrti2.f"> 
+*> Download DTRTI2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrti2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrti2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrti2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, UPLO
 *       INTEGER            INFO, LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -98,22 +98,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, UPLO
diff --git a/lib/linalg/dtrtri.f b/lib/linalg/dtrtri.f
index 5d27ca56af1e081a39598e6006cc18ade457d6a3..d34b40bcc00b0d6d647d3a863a6302e662c27b2a 100644
--- a/lib/linalg/dtrtri.f
+++ b/lib/linalg/dtrtri.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download DTRTRI + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f"> 
+*> Download DTRTRI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, UPLO
 *       INTEGER            INFO, LDA, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -97,22 +97,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup doubleOTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, UPLO
diff --git a/lib/linalg/dznrm2.f b/lib/linalg/dznrm2.f
index b5713a2bfaf0b92dd3e27e8a007eb91130c2195a..e5a71d98f6709b81f8d2484822c196810285f0ee 100644
--- a/lib/linalg/dznrm2.f
+++ b/lib/linalg/dznrm2.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16 X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -29,15 +29,36 @@
 *>    DZNRM2 := sqrt( x**H*x )
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] X
+*> \verbatim
+*>          X is COMPLEX*16 array, dimension (N)
+*>         complex vector with N elements
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of X
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup double_blas_level1
 *
@@ -54,10 +75,10 @@
 *  =====================================================================
       DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/idamax.f b/lib/linalg/idamax.f
index 4233fcc27350e97319f56818bee1031df0c7a94c..17041680a40c8953314a32a7a164b220a8900f26 100644
--- a/lib/linalg/idamax.f
+++ b/lib/linalg/idamax.f
@@ -2,39 +2,59 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION IDAMAX(N,DX,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION DX(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
-*>    IDAMAX finds the index of element having max. absolute value.
+*>    IDAMAX finds the index of the first element having maximum absolute value.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DX
+*> \verbatim
+*>          DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of SX
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup aux_blas
 *
@@ -51,10 +71,10 @@
 *  =====================================================================
       INTEGER FUNCTION IDAMAX(N,DX,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/ieeeck.f b/lib/linalg/ieeeck.f
index 132e436770774827878009d82d957052816cf2ae..2655958b4a2d94179e9691f9c9fe696e9bdef134 100644
--- a/lib/linalg/ieeeck.f
+++ b/lib/linalg/ieeeck.f
@@ -2,29 +2,29 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download IEEECK + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ieeeck.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ieeeck.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ieeeck.f"> 
+*> Download IEEECK + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ieeeck.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ieeeck.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ieeeck.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER          FUNCTION IEEECK( ISPEC, ZERO, ONE )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            ISPEC
 *       REAL               ONE, ZERO
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -70,22 +70,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       INTEGER          FUNCTION IEEECK( ISPEC, ZERO, ONE )
 *
-*  -- LAPACK auxiliary routine (version 3.4.0) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            ISPEC
diff --git a/lib/linalg/iladlc.f b/lib/linalg/iladlc.f
index b56387d320f716ea3c41cad85303de8baf21ca6a..c6476113d1550202c781f019651ab7643f4f0cc5 100644
--- a/lib/linalg/iladlc.f
+++ b/lib/linalg/iladlc.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ILADLC + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iladlc.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iladlc.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iladlc.f"> 
+*> Download ILADLC + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iladlc.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iladlc.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iladlc.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION ILADLC( M, N, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            M, N, LDA
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -66,22 +66,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       INTEGER FUNCTION ILADLC( M, N, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            M, N, LDA
diff --git a/lib/linalg/iladlr.f b/lib/linalg/iladlr.f
index fe155af075fb9f1eb7ac989d8ea329855e033f88..e8951d86cc288494faa54879b96ea50f187b0207 100644
--- a/lib/linalg/iladlr.f
+++ b/lib/linalg/iladlr.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ILADLR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iladlr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iladlr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iladlr.f"> 
+*> Download ILADLR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iladlr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iladlr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iladlr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION ILADLR( M, N, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            M, N, LDA
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -66,22 +66,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       INTEGER FUNCTION ILADLR( M, N, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            M, N, LDA
diff --git a/lib/linalg/ilaenv.f b/lib/linalg/ilaenv.f
index 867464de357e36fd1b19db47f80c1b40d47262ac..2be05815179215335f31713eae454135e97fdde3 100644
--- a/lib/linalg/ilaenv.f
+++ b/lib/linalg/ilaenv.f
@@ -2,29 +2,29 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ILAENV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilaenv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilaenv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilaenv.f"> 
+*> Download ILAENV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilaenv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilaenv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilaenv.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER*( * )    NAME, OPTS
 *       INTEGER            ISPEC, N1, N2, N3, N4
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -82,7 +82,7 @@
 *>          =10: ieee NaN arithmetic can be trusted not to trap
 *>          =11: infinity arithmetic can be trusted not to trap
 *>          12 <= ISPEC <= 16:
-*>               xHSEQR or one of its subroutines,
+*>               xHSEQR or related subroutines,
 *>               see IPARMQ for detailed explanation
 *> \endverbatim
 *>
@@ -127,14 +127,14 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -162,10 +162,10 @@
 *  =====================================================================
       INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
 *
-*  -- LAPACK auxiliary routine (version 3.4.0) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER*( * )    NAME, OPTS
@@ -183,13 +183,14 @@
       INTRINSIC          CHAR, ICHAR, INT, MIN, REAL
 *     ..
 *     .. External Functions ..
-      INTEGER            IEEECK, IPARMQ
-      EXTERNAL           IEEECK, IPARMQ
+      INTEGER            IEEECK, IPARMQ, IPARAM2STAGE
+      EXTERNAL           IEEECK, IPARMQ, IPARAM2STAGE
 *     ..
 *     .. Executable Statements ..
 *
       GO TO ( 10, 10, 10, 80, 90, 100, 110, 120,
-     $        130, 140, 150, 160, 160, 160, 160, 160 )ISPEC
+     $        130, 140, 150, 160, 160, 160, 160, 160,
+     $        170, 170, 170, 170, 170 )ISPEC
 *
 *     Invalid value for ISPEC
 *
@@ -283,6 +284,52 @@
             ELSE
                NB = 32
             END IF
+         ELSE IF( C3.EQ.'QR ') THEN
+            IF( N3 .EQ. 1) THEN
+               IF( SNAME ) THEN
+*     M*N
+                  IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN
+                     NB = N1
+                  ELSE
+                     NB = 32768/N2
+                  END IF
+               ELSE
+                  IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN
+                     NB = N1
+                  ELSE
+                     NB = 32768/N2
+                  END IF
+               END IF
+            ELSE
+               IF( SNAME ) THEN
+                  NB = 1
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         ELSE IF( C3.EQ.'LQ ') THEN
+            IF( N3 .EQ. 2) THEN
+               IF( SNAME ) THEN
+*     M*N
+                  IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN
+                     NB = N1
+                  ELSE
+                     NB = 32768/N2
+                  END IF
+               ELSE
+                  IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN
+                     NB = N1
+                  ELSE
+                     NB = 32768/N2
+                  END IF
+               END IF
+            ELSE
+               IF( SNAME ) THEN
+                  NB = 1
+               ELSE
+                  NB = 1
+               END IF
+            END IF
          ELSE IF( C3.EQ.'HRD' ) THEN
             IF( SNAME ) THEN
                NB = 32
@@ -397,6 +444,12 @@
             ELSE
                NB = 64
             END IF
+         ELSE IF ( C3.EQ.'EVC' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
          END IF
       ELSE IF( C2.EQ.'LA' ) THEN
          IF( C3.EQ.'UUM' ) THEN
@@ -410,6 +463,15 @@
          IF( C3.EQ.'EBZ' ) THEN
             NB = 1
          END IF
+      ELSE IF( C2.EQ.'GG' ) THEN
+         NB = 32
+         IF( C3.EQ.'HD3' ) THEN
+            IF( SNAME ) THEN
+               NB = 32
+            ELSE
+               NB = 32
+            END IF
+         END IF
       END IF
       ILAENV = NB
       RETURN
@@ -488,6 +550,11 @@
                NBMIN = 2
             END IF
          END IF
+      ELSE IF( C2.EQ.'GG' ) THEN
+         NBMIN = 2
+         IF( C3.EQ.'HD3' ) THEN
+            NBMIN = 2
+         END IF
       END IF
       ILAENV = NBMIN
       RETURN
@@ -542,6 +609,11 @@
                NX = 128
             END IF
          END IF
+      ELSE IF( C2.EQ.'GG' ) THEN
+         NX = 128
+         IF( C3.EQ.'HD3' ) THEN
+            NX = 128
+         END IF
       END IF
       ILAENV = NX
       RETURN
@@ -614,10 +686,17 @@
 *
   160 CONTINUE
 *
-*     12 <= ISPEC <= 16: xHSEQR or one of its subroutines. 
+*     12 <= ISPEC <= 16: xHSEQR or related subroutines.
 *
       ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
       RETURN
+*
+  170 CONTINUE
+*
+*     17 <= ISPEC <= 21: 2stage eigenvalues and SVD or related subroutines.
+*
+      ILAENV = IPARAM2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
+      RETURN
 *
 *     End of ILAENV
 *
diff --git a/lib/linalg/ilazlc.f b/lib/linalg/ilazlc.f
index 718b277dfa6596e95fcbe37972f5af9d1fb79fb3..07dfc93e31af02affe9c3f3ed601d1a144b6dfc2 100644
--- a/lib/linalg/ilazlc.f
+++ b/lib/linalg/ilazlc.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ILAZLC + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlc.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlc.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlc.f"> 
+*> Download ILAZLC + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlc.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlc.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlc.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION ILAZLC( M, N, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            M, N, LDA
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -66,22 +66,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       INTEGER FUNCTION ILAZLC( M, N, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            M, N, LDA
diff --git a/lib/linalg/ilazlr.f b/lib/linalg/ilazlr.f
index 44697214c75b0358a1568e2f46aa3d2b449f7b5c..4ca4ed1a44fe585f534375701fc5fd8945ba7b79 100644
--- a/lib/linalg/ilazlr.f
+++ b/lib/linalg/ilazlr.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ILAZLR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlr.f"> 
+*> Download ILAZLR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION ILAZLR( M, N, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            M, N, LDA
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -66,22 +66,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       INTEGER FUNCTION ILAZLR( M, N, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            M, N, LDA
diff --git a/lib/linalg/iparam2stage.F b/lib/linalg/iparam2stage.F
new file mode 100644
index 0000000000000000000000000000000000000000..60bd0b696b8398b2a7bb7aed329441e8432bb26d
--- /dev/null
+++ b/lib/linalg/iparam2stage.F
@@ -0,0 +1,386 @@
+*> \brief \b IPARAM2STAGE
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at 
+*            http://www.netlib.org/lapack/explore-html/ 
+*
+*> \htmlonly
+*> Download IPARAM2STAGE + dependencies 
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> 
+*> [TGZ]</a> 
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> 
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> 
+*> [TXT]</a>
+*> \endhtmlonly 
+*
+*  Definition:
+*  ===========
+*
+*       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
+*                                    NI, NBI, IBI, NXI )
+*       #if defined(_OPENMP)
+*           use omp_lib
+*       #endif
+*       IMPLICIT NONE
+*
+*       .. Scalar Arguments ..
+*       CHARACTER*( * )    NAME, OPTS
+*       INTEGER            ISPEC, NI, NBI, IBI, NXI
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*>      This program sets problem and machine dependent parameters
+*>      useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
+*>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
+*>      and related subroutines for eigenvalue problems. 
+*>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ISPEC
+*> \verbatim
+*>          ISPEC is integer scalar
+*>              ISPEC specifies which tunable parameter IPARAM2STAGE should
+*>              return.
+*>
+*>              ISPEC=17: the optimal blocksize nb for the reduction to
+*                         BAND
+*>
+*>              ISPEC=18: the optimal blocksize ib for the eigenvectors
+*>                        singular vectors update routine
+*>
+*>              ISPEC=19: The length of the array that store the Housholder 
+*>                        representation for the second stage 
+*>                        Band to Tridiagonal or Bidiagonal
+*>
+*>              ISPEC=20: The workspace needed for the routine in input.
+*>
+*>              ISPEC=21: For future release.
+*> \endverbatim
+*>
+*> \param[in] NAME
+*> \verbatim
+*>          NAME is character string
+*>               Name of the calling subroutine
+*> \endverbatim
+*>
+*> \param[in] OPTS
+*> \verbatim
+*>          OPTS is CHARACTER*(*)
+*>          The character options to the subroutine NAME, concatenated
+*>          into a single character string.  For example, UPLO = 'U',
+*>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
+*>          be specified as OPTS = 'UTN'.
+*> \endverbatim
+*>
+*> \param[in] NI
+*> \verbatim
+*>          NI is INTEGER which is the size of the matrix
+*> \endverbatim
+*>
+*> \param[in] NBI
+*> \verbatim
+*>          NBI is INTEGER which is the used in the reduciton, 
+*           (e.g., the size of the band), needed to compute workspace
+*           and LHOUS2.
+*> \endverbatim
+*>
+*> \param[in] IBI
+*> \verbatim
+*>          IBI is INTEGER which represent the IB of the reduciton,
+*           needed to compute workspace and LHOUS2.
+*> \endverbatim
+*>
+*> \param[in] NXI
+*> \verbatim
+*>          NXI is INTEGER needed in the future release.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee 
+*> \author Univ. of California Berkeley 
+*> \author Univ. of Colorado Denver 
+*> \author NAG Ltd. 
+*
+*> \date June 2016
+*
+*> \ingroup auxOTHERauxiliary
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  Implemented by Azzam Haidar.
+*>
+*>  All detail are available on technical report, SC11, SC13 papers.
+*>
+*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
+*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
+*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
+*>  of 2011 International Conference for High Performance Computing,
+*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
+*>  Article 8 , 11 pages.
+*>  http://doi.acm.org/10.1145/2063384.2063394
+*>
+*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
+*>  An improved parallel singular value algorithm and its implementation 
+*>  for multicore hardware, In Proceedings of 2013 International Conference
+*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
+*>  Denver, Colorado, USA, 2013.
+*>  Article 90, 12 pages.
+*>  http://doi.acm.org/10.1145/2503210.2503292
+*>
+*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
+*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
+*>  calculations based on fine-grained memory aware tasks.
+*>  International Journal of High Performance Computing Applications.
+*>  Volume 28 Issue 2, Pages 196-209, May 2014.
+*>  http://hpc.sagepub.com/content/28/2/196 
+*>
+*> \endverbatim
+*>
+*  =====================================================================
+      INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
+     $                              NI, NBI, IBI, NXI )
+#if defined(_OPENMP)
+      use omp_lib
+#endif
+      IMPLICIT NONE
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER*( * )    NAME, OPTS
+      INTEGER            ISPEC, NI, NBI, IBI, NXI
+*
+*  ================================================================
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
+     $                   FACTOPTNB, QROPTNB, LQOPTNB
+      LOGICAL            RPREC, CPREC
+      CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CHAR, ICHAR, MAX
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Invalid value for ISPEC
+*
+      IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
+          IPARAM2STAGE = -1
+          RETURN
+      ENDIF
+*
+*     Get the number of threads
+*      
+      NTHREADS = 1
+#if defined(_OPENMP)
+!$OMP PARALLEL 
+      NTHREADS = OMP_GET_NUM_THREADS()
+!$OMP END PARALLEL
+#endif
+*      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
+*
+      IF( ISPEC .NE. 19 ) THEN
+*
+*        Convert NAME to upper case if the first character is lower case.
+*
+         IPARAM2STAGE = -1
+         SUBNAM = NAME
+         IC = ICHAR( SUBNAM( 1: 1 ) )
+         IZ = ICHAR( 'Z' )
+         IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
+*
+*           ASCII character set
+*
+            IF( IC.GE.97 .AND. IC.LE.122 ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC-32 )
+               DO 100 I = 2, 12
+                  IC = ICHAR( SUBNAM( I: I ) )
+                  IF( IC.GE.97 .AND. IC.LE.122 )
+     $               SUBNAM( I: I ) = CHAR( IC-32 )
+  100          CONTINUE
+            END IF
+*
+         ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
+*
+*           EBCDIC character set
+*
+            IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC+64 )
+               DO 110 I = 2, 12
+                  IC = ICHAR( SUBNAM( I: I ) )
+                  IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
+     $                I ) = CHAR( IC+64 )
+  110          CONTINUE
+            END IF
+*
+         ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
+*
+*           Prime machines:  ASCII+128
+*
+            IF( IC.GE.225 .AND. IC.LE.250 ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC-32 )
+               DO 120 I = 2, 12
+                 IC = ICHAR( SUBNAM( I: I ) )
+                 IF( IC.GE.225 .AND. IC.LE.250 )
+     $             SUBNAM( I: I ) = CHAR( IC-32 )
+  120          CONTINUE
+            END IF
+         END IF
+*
+         PREC  = SUBNAM( 1: 1 )
+         ALGO  = SUBNAM( 4: 6 )
+         STAG  = SUBNAM( 8:12 )
+         RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
+         CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
+*
+*        Invalid value for PRECISION
+*      
+         IF( .NOT.( RPREC .OR. CPREC ) ) THEN
+             IPARAM2STAGE = -1
+             RETURN
+         ENDIF
+      ENDIF
+*      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
+*     $           '   ALGO ',ALGO,'    STAGE ',STAG
+*      
+*
+      IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN 
+*
+*     ISPEC = 17, 18:  block size KD, IB
+*     Could be also dependent from N but for now it
+*     depend only on sequential or parallel
+*
+         IF( NTHREADS.GT.4 ) THEN
+            IF( CPREC ) THEN
+               KD = 128
+               IB = 32
+            ELSE
+               KD = 160
+               IB = 40
+            ENDIF
+         ELSE IF( NTHREADS.GT.1 ) THEN
+            IF( CPREC ) THEN
+               KD = 64
+               IB = 32
+            ELSE
+               KD = 64
+               IB = 32
+            ENDIF
+         ELSE
+            IF( CPREC ) THEN
+               KD = 16
+               IB = 16
+            ELSE
+               KD = 32
+               IB = 16
+            ENDIF
+         ENDIF
+         IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
+         IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
+*
+      ELSE IF ( ISPEC .EQ. 19 ) THEN
+*
+*     ISPEC = 19:  
+*     LHOUS length of the Houselholder representation
+*     matrix (V,T) of the second stage. should be >= 1.
+*
+*     Will add the VECT OPTION HERE next release
+         VECT  = OPTS(1:1)
+         IF( VECT.EQ.'N' ) THEN
+            LHOUS = MAX( 1, 4*NI )
+         ELSE
+*           This is not correct, it need to call the ALGO and the stage2
+            LHOUS = MAX( 1, 4*NI ) + IBI
+         ENDIF
+         IF( LHOUS.GE.0 ) THEN
+            IPARAM2STAGE = LHOUS
+         ELSE
+            IPARAM2STAGE = -1
+         ENDIF
+*
+      ELSE IF ( ISPEC .EQ. 20 ) THEN
+*
+*     ISPEC = 20: (21 for future use)  
+*     LWORK length of the workspace for 
+*     either or both stages for TRD and BRD. should be >= 1.
+*     TRD:
+*     TRD_stage 1: = LT + LW + LS1 + LS2
+*                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD 
+*                    where LDT=LDS2=KD
+*                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
+*     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
+*     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
+*                  = N*KD + N*max(KD+1,FACTOPTNB) 
+*                    + max(2*KD*KD, KD*NTHREADS) 
+*                    + (KD+1)*N
+         LWORK        = -1
+         SUBNAM(1:1)  = PREC
+         SUBNAM(2:6)  = 'GEQRF'
+         QROPTNB      = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
+         SUBNAM(2:6)  = 'GELQF'
+         LQOPTNB      = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
+*        Could be QR or LQ for TRD and the max for BRD
+         FACTOPTNB    = MAX(QROPTNB, LQOPTNB)
+         IF( ALGO.EQ.'TRD' ) THEN
+            IF( STAG.EQ.'2STAG' ) THEN
+               LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
+     $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
+     $              + (NBI+1)*NI
+            ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
+               LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
+            ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
+               LWORK = (2*NBI+1)*NI + NBI*NTHREADS
+            ENDIF
+         ELSE IF( ALGO.EQ.'BRD' ) THEN
+            IF( STAG.EQ.'2STAG' ) THEN
+               LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
+     $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
+     $              + (NBI+1)*NI
+            ELSE IF( STAG.EQ.'GE2GB' ) THEN
+               LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
+            ELSE IF( STAG.EQ.'GB2BD' ) THEN
+               LWORK = (3*NBI+1)*NI + NBI*NTHREADS
+            ENDIF
+         ENDIF
+         LWORK = MAX ( 1, LWORK )
+
+         IF( LWORK.GT.0 ) THEN
+            IPARAM2STAGE = LWORK
+         ELSE
+            IPARAM2STAGE = -1
+         ENDIF
+*
+      ELSE IF ( ISPEC .EQ. 21 ) THEN
+*
+*     ISPEC = 21 for future use 
+         IPARAM2STAGE = NXI
+      ENDIF
+*
+*     ==== End of IPARAM2STAGE ====
+*
+      END
diff --git a/lib/linalg/iparmq.f b/lib/linalg/iparmq.f
index bd5bd7a0db7202870f90950162b9bc84230ed7f5..e576e0db01ee375ec82dabda47eca89800e0aba7 100644
--- a/lib/linalg/iparmq.f
+++ b/lib/linalg/iparmq.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download IPARMQ + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparmq.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparmq.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparmq.f"> 
+*> Download IPARMQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparmq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparmq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparmq.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            IHI, ILO, ISPEC, LWORK, N
 *       CHARACTER          NAME*( * ), OPTS*( * )
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -31,8 +31,9 @@
 *> \verbatim
 *>
 *>      This program sets problem and machine dependent parameters
-*>      useful for xHSEQR and its subroutines. It is called whenever 
-*>      ILAENV is called with 12 <= ISPEC <= 16
+*>      useful for xHSEQR and related subroutines for eigenvalue
+*>      problems. It is called whenever
+*>      IPARMQ is called with 12 <= ISPEC <= 16
 *> \endverbatim
 *
 *  Arguments:
@@ -40,7 +41,7 @@
 *
 *> \param[in] ISPEC
 *> \verbatim
-*>          ISPEC is integer scalar
+*>          ISPEC is INTEGER
 *>              ISPEC specifies which tunable parameter IPARMQ should
 *>              return.
 *>
@@ -75,19 +76,26 @@
 *>
 *>              ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
 *>                        following meanings.
-*>                        0:  During the multi-shift QR sweep,
-*>                            xLAQR5 does not accumulate reflections and
-*>                            does not use matrix-matrix multiply to
-*>                            update the far-from-diagonal matrix
-*>                            entries.
-*>                        1:  During the multi-shift QR sweep,
-*>                            xLAQR5 and/or xLAQRaccumulates reflections and uses
-*>                            matrix-matrix multiply to update the
+*>                        0:  During the multi-shift QR/QZ sweep,
+*>                            blocked eigenvalue reordering, blocked
+*>                            Hessenberg-triangular reduction,
+*>                            reflections and/or rotations are not
+*>                            accumulated when updating the
 *>                            far-from-diagonal matrix entries.
-*>                        2:  During the multi-shift QR sweep.
-*>                            xLAQR5 accumulates reflections and takes
-*>                            advantage of 2-by-2 block structure during
-*>                            matrix-matrix multiplies.
+*>                        1:  During the multi-shift QR/QZ sweep,
+*>                            blocked eigenvalue reordering, blocked
+*>                            Hessenberg-triangular reduction,
+*>                            reflections and/or rotations are
+*>                            accumulated, and matrix-matrix
+*>                            multiplication is used to update the
+*>                            far-from-diagonal matrix entries.
+*>                        2:  During the multi-shift QR/QZ sweep,
+*>                            blocked eigenvalue reordering, blocked
+*>                            Hessenberg-triangular reduction,
+*>                            reflections and/or rotations are
+*>                            accumulated, and 2-by-2 block structure
+*>                            is exploited during matrix-matrix
+*>                            multiplies.
 *>                        (If xTRMM is slower than xGEMM, then
 *>                        IPARMQ(ISPEC=16)=1 may be more efficient than
 *>                        IPARMQ(ISPEC=16)=2 despite the greater level of
@@ -109,7 +117,7 @@
 *>
 *> \param[in] N
 *> \verbatim
-*>          N is integer scalar
+*>          N is INTEGER
 *>               N is the order of the Hessenberg matrix H.
 *> \endverbatim
 *>
@@ -127,21 +135,21 @@
 *>
 *> \param[in] LWORK
 *> \verbatim
-*>          LWORK is integer scalar
+*>          LWORK is INTEGER
 *>               The amount of workspace available.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date June 2017
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *> \par Further Details:
 *  =====================
@@ -214,10 +222,10 @@
 *  =====================================================================
       INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.0) --
+*  -- LAPACK auxiliary routine (version 3.7.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     June 2017
 *
 *     .. Scalar Arguments ..
       INTEGER            IHI, ILO, ISPEC, LWORK, N
@@ -236,6 +244,8 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            NH, NS
+      INTEGER            I, IC, IZ
+      CHARACTER          SUBNAM*6
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          LOG, MAX, MOD, NINT, REAL
@@ -304,12 +314,75 @@
 *        .     doing it.  A small amount of work could be saved
 *        .     by making this choice dependent also upon the
 *        .     NH=IHI-ILO+1.
+*
+*
+*        Convert NAME to upper case if the first character is lower case.
 *
          IPARMQ = 0
-         IF( NS.GE.KACMIN )
-     $      IPARMQ = 1
-         IF( NS.GE.K22MIN )
-     $      IPARMQ = 2
+         SUBNAM = NAME
+         IC = ICHAR( SUBNAM( 1: 1 ) )
+         IZ = ICHAR( 'Z' )
+         IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
+*
+*           ASCII character set
+*
+            IF( IC.GE.97 .AND. IC.LE.122 ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC-32 )
+               DO I = 2, 6
+                  IC = ICHAR( SUBNAM( I: I ) )
+                  IF( IC.GE.97 .AND. IC.LE.122 )
+     $               SUBNAM( I: I ) = CHAR( IC-32 )
+               END DO
+            END IF
+*
+         ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
+*
+*           EBCDIC character set
+*
+            IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC+64 )
+               DO I = 2, 6
+                  IC = ICHAR( SUBNAM( I: I ) )
+                  IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
+     $                I ) = CHAR( IC+64 )
+               END DO
+            END IF
+*
+         ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
+*
+*           Prime machines:  ASCII+128
+*
+            IF( IC.GE.225 .AND. IC.LE.250 ) THEN
+               SUBNAM( 1: 1 ) = CHAR( IC-32 )
+               DO I = 2, 6
+                  IC = ICHAR( SUBNAM( I: I ) )
+                  IF( IC.GE.225 .AND. IC.LE.250 )
+     $               SUBNAM( I: I ) = CHAR( IC-32 )
+               END DO
+            END IF
+         END IF
+*
+         IF( SUBNAM( 2:6 ).EQ.'GGHRD' .OR.
+     $       SUBNAM( 2:6 ).EQ.'GGHD3' ) THEN
+            IPARMQ = 1
+            IF( NH.GE.K22MIN )
+     $         IPARMQ = 2
+         ELSE IF ( SUBNAM( 4:6 ).EQ.'EXC' ) THEN
+            IF( NH.GE.KACMIN )
+     $         IPARMQ = 1
+            IF( NH.GE.K22MIN )
+     $         IPARMQ = 2
+         ELSE IF ( SUBNAM( 2:6 ).EQ.'HSEQR' .OR.
+     $             SUBNAM( 2:5 ).EQ.'LAQR' ) THEN
+            IF( NS.GE.KACMIN )
+     $         IPARMQ = 1
+            IF( NS.GE.K22MIN )
+     $         IPARMQ = 2
+         END IF
 *
       ELSE
 *        ===== invalid value of ispec =====
diff --git a/lib/linalg/lsame.f b/lib/linalg/lsame.f
index f19f9cda9e6859f0ac84cacdead1dabe5e144c0c..d8194786966a58489326d905c1e538cb9480bdf1 100644
--- a/lib/linalg/lsame.f
+++ b/lib/linalg/lsame.f
@@ -2,18 +2,18 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       LOGICAL FUNCTION LSAME(CA,CB)
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER CA,CB
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -41,12 +41,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup aux_blas
 *
@@ -56,7 +56,7 @@
 *  -- Reference BLAS level1 routine (version 3.1) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER CA,CB
diff --git a/lib/linalg/xerbla.f b/lib/linalg/xerbla.f
index 3e93bc4e0e467454b3e357dd1f815024e3e76d32..4a0350988c1fbdb20655d809c95082015cb9c30b 100644
--- a/lib/linalg/xerbla.f
+++ b/lib/linalg/xerbla.f
@@ -2,29 +2,29 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download XERBLA + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/xerbla.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/xerbla.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/xerbla.f"> 
+*> Download XERBLA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/xerbla.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/xerbla.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/xerbla.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE XERBLA( SRNAME, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER*(*)      SRNAME
 *       INTEGER            INFO
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -58,22 +58,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
-*> \ingroup auxOTHERauxiliary
+*> \ingroup OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE XERBLA( SRNAME, INFO )
 *
-*  -- LAPACK auxiliary routine (version 3.4.0) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER*(*)      SRNAME
diff --git a/lib/linalg/zaxpy.f b/lib/linalg/zaxpy.f
index e6f5e1f6dbfe289ad666ffb6652387be9a808666..b7b9ee69e43838f1da325d311b4bbb98e890cc01 100644
--- a/lib/linalg/zaxpy.f
+++ b/lib/linalg/zaxpy.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ZA
 *       INTEGER INCX,INCY,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*),ZY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -27,15 +27,52 @@
 *>    ZAXPY constant times a vector plus a vector.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] ZA
+*> \verbatim
+*>          ZA is COMPLEX*16
+*>           On entry, ZA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in] ZX
+*> \verbatim
+*>          ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*>
+*> \param[in,out] ZY
+*> \verbatim
+*>          ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of ZY
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -51,10 +88,10 @@
 *  =====================================================================
       SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ZA
diff --git a/lib/linalg/zcopy.f b/lib/linalg/zcopy.f
index baeafd5c3b211b62e3dd415508e861579461fcc9..3777079730d9f0d311016554849e3801f70607d3 100644
--- a/lib/linalg/zcopy.f
+++ b/lib/linalg/zcopy.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*),ZY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -26,15 +26,46 @@
 *>    ZCOPY copies a vector, x, to a vector, y.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] ZX
+*> \verbatim
+*>          ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*>
+*> \param[out] ZY
+*> \verbatim
+*>          ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of ZY
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -50,10 +81,10 @@
 *  =====================================================================
       SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
diff --git a/lib/linalg/zdotc.f b/lib/linalg/zdotc.f
index 660648bbe1d61efca33f0d5c66d41577720bb18b..e6cd11b21db42a4cbbb97628089b70c76806273f 100644
--- a/lib/linalg/zdotc.f
+++ b/lib/linalg/zdotc.f
@@ -2,39 +2,72 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*),ZY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
-*> ZDOTC forms the dot product of a vector.
+*> ZDOTC forms the dot product of two complex vectors
+*>      ZDOTC = X^H * Y
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] ZX
+*> \verbatim
+*>          ZX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*>
+*> \param[in] ZY
+*> \verbatim
+*>          ZY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of ZY
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -50,10 +83,10 @@
 *  =====================================================================
       COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
diff --git a/lib/linalg/zdscal.f b/lib/linalg/zdscal.f
index 57a949023767cc8442b706b25e11f098bbc43bb0..71d4da55be40446f49faf11e4e65cfc09f963e8c 100644
--- a/lib/linalg/zdscal.f
+++ b/lib/linalg/zdscal.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZDSCAL(N,DA,ZX,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION DA
 *       INTEGER INCX,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -27,15 +27,41 @@
 *>    ZDSCAL scales a vector by a constant.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] DA
+*> \verbatim
+*>          DA is DOUBLE PRECISION
+*>           On entry, DA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in,out] ZX
+*> \verbatim
+*>          ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -52,10 +78,10 @@
 *  =====================================================================
       SUBROUTINE ZDSCAL(N,DA,ZX,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION DA
diff --git a/lib/linalg/zgemm.f b/lib/linalg/zgemm.f
index f423315508a0de2fc5ce314db47bab240da9d7fa..c3ac7551d1c793c050c39c8c0f542e6459db8a13 100644
--- a/lib/linalg/zgemm.f
+++ b/lib/linalg/zgemm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA,BETA
 *       INTEGER K,LDA,LDB,LDC,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -97,7 +97,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
+*>          A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
 *>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
@@ -116,7 +116,7 @@
 *>
 *> \param[in] B
 *> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
+*>          B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is
 *>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
 *>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
 *>           part of the array  B  must contain the matrix  B,  otherwise
@@ -142,7 +142,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
+*>          C is COMPLEX*16 array, dimension ( LDC, N )
 *>           Before entry, the leading  m by n  part of the array  C must
 *>           contain the matrix  C,  except when  beta  is zero, in which
 *>           case C need not be set on entry.
@@ -161,12 +161,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level3
 *
@@ -187,10 +187,10 @@
 *  =====================================================================
       SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA,BETA
@@ -317,12 +317,10 @@
    60                 CONTINUE
                   END IF
                   DO 80 L = 1,K
-                      IF (B(L,J).NE.ZERO) THEN
-                          TEMP = ALPHA*B(L,J)
-                          DO 70 I = 1,M
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                     CONTINUE
-                      END IF
+                      TEMP = ALPHA*B(L,J)
+                      DO 70 I = 1,M
+                          C(I,J) = C(I,J) + TEMP*A(I,L)
+   70                 CONTINUE
    80             CONTINUE
    90         CONTINUE
           ELSE IF (CONJA) THEN
@@ -376,17 +374,15 @@
   170                 CONTINUE
                   END IF
                   DO 190 L = 1,K
-                      IF (B(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*DCONJG(B(J,L))
-                          DO 180 I = 1,M
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  180                     CONTINUE
-                      END IF
+                      TEMP = ALPHA*DCONJG(B(J,L))
+                      DO 180 I = 1,M
+                          C(I,J) = C(I,J) + TEMP*A(I,L)
+  180                 CONTINUE
   190             CONTINUE
   200         CONTINUE
           ELSE
 *
-*           Form  C := alpha*A*B**T          + beta*C
+*           Form  C := alpha*A*B**T + beta*C
 *
               DO 250 J = 1,N
                   IF (BETA.EQ.ZERO) THEN
@@ -399,12 +395,10 @@
   220                 CONTINUE
                   END IF
                   DO 240 L = 1,K
-                      IF (B(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*B(J,L)
-                          DO 230 I = 1,M
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  230                     CONTINUE
-                      END IF
+                      TEMP = ALPHA*B(J,L)
+                      DO 230 I = 1,M
+                          C(I,J) = C(I,J) + TEMP*A(I,L)
+  230                 CONTINUE
   240             CONTINUE
   250         CONTINUE
           END IF
diff --git a/lib/linalg/zgemv.f b/lib/linalg/zgemv.f
index 4e174c956c9ee8cac85b1d5d765e92f838c4d777..7088d383f449fbb52ddf608fdea128d1f5fb3a9f 100644
--- a/lib/linalg/zgemv.f
+++ b/lib/linalg/zgemv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA,BETA
 *       INTEGER INCX,INCY,LDA,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -73,7 +73,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
+*>          A is COMPLEX*16 array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients.
 *> \endverbatim
@@ -88,7 +88,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
@@ -112,7 +112,7 @@
 *>
 *> \param[in,out] Y
 *> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
+*>          Y is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
 *>           and at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
@@ -131,12 +131,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -158,10 +158,10 @@
 *  =====================================================================
       SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA,BETA
@@ -285,24 +285,20 @@
           JX = KX
           IF (INCY.EQ.1) THEN
               DO 60 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      DO 50 I = 1,M
-                          Y(I) = Y(I) + TEMP*A(I,J)
-   50                 CONTINUE
-                  END IF
+                  TEMP = ALPHA*X(JX)
+                  DO 50 I = 1,M
+                      Y(I) = Y(I) + TEMP*A(I,J)
+   50             CONTINUE
                   JX = JX + INCX
    60         CONTINUE
           ELSE
               DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IY = KY
-                      DO 70 I = 1,M
-                          Y(IY) = Y(IY) + TEMP*A(I,J)
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
+                  TEMP = ALPHA*X(JX)
+                  IY = KY
+                  DO 70 I = 1,M
+                      Y(IY) = Y(IY) + TEMP*A(I,J)
+                      IY = IY + INCY
+   70             CONTINUE
                   JX = JX + INCX
    80         CONTINUE
           END IF
diff --git a/lib/linalg/zgerc.f b/lib/linalg/zgerc.f
index accfeafc053ad42c844281de2739d62148d1a602..058dccfc1ccb941aabafb3de6a68d95cc933faae 100644
--- a/lib/linalg/zgerc.f
+++ b/lib/linalg/zgerc.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA
 *       INTEGER INCX,INCY,LDA,M,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -57,7 +57,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( m - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the m
 *>           element vector x.
@@ -72,7 +72,7 @@
 *>
 *> \param[in] Y
 *> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
+*>          Y is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y.
@@ -87,7 +87,7 @@
 *>
 *> \param[in,out] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
+*>          A is COMPLEX*16 array, dimension ( LDA, N )
 *>           Before entry, the leading m by n part of the array A must
 *>           contain the matrix of coefficients. On exit, A is
 *>           overwritten by the updated matrix.
@@ -104,12 +104,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -130,10 +130,10 @@
 *  =====================================================================
       SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA
diff --git a/lib/linalg/zheev.f b/lib/linalg/zheev.f
index adba990f0a9d396198cf99711d38364e90f4e514..3e87778740750dab190b8180cfe12b92c245c964 100644
--- a/lib/linalg/zheev.f
+++ b/lib/linalg/zheev.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZHEEV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev.f"> 
+*> Download ZHEEV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
 *                         INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          JOBZ, UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -29,7 +29,7 @@
 *       DOUBLE PRECISION   RWORK( * ), W( * )
 *       COMPLEX*16         A( LDA, * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -127,12 +127,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16HEeigen
 *
@@ -140,10 +140,10 @@
       SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
      $                  INFO )
 *
-*  -- LAPACK driver routine (version 3.4.0) --
+*  -- LAPACK driver routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          JOBZ, UPLO
diff --git a/lib/linalg/zhemv.f b/lib/linalg/zhemv.f
index 34216fbfff8a12b8d4c18cbdb2a7aa70a2d275e3..3ea0753f40b9d81ea154074b06131cd1491d18ab 100644
--- a/lib/linalg/zhemv.f
+++ b/lib/linalg/zhemv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA,BETA
 *       INTEGER INCX,INCY,LDA,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -65,7 +65,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
+*>          A is COMPLEX*16 array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular part of the hermitian matrix and the strictly
@@ -88,7 +88,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x.
@@ -110,7 +110,7 @@
 *>
 *> \param[in,out] Y
 *> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
+*>          Y is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y. On exit, Y is overwritten by the updated
@@ -127,12 +127,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -154,10 +154,10 @@
 *  =====================================================================
       SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA,BETA
diff --git a/lib/linalg/zher2.f b/lib/linalg/zher2.f
index e2a02c3c68fb3d705aa2c734d4e21fc722908def..e3a383189d92f46e2400d48b6247a22aadfd6d1e 100644
--- a/lib/linalg/zher2.f
+++ b/lib/linalg/zher2.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA
 *       INTEGER INCX,INCY,LDA,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),X(*),Y(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -65,7 +65,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x.
@@ -80,7 +80,7 @@
 *>
 *> \param[in] Y
 *> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
+*>          Y is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCY ) ).
 *>           Before entry, the incremented array Y must contain the n
 *>           element vector y.
@@ -95,7 +95,7 @@
 *>
 *> \param[in,out] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
+*>          A is COMPLEX*16 array, dimension ( LDA, N )
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular part of the hermitian matrix and the strictly
@@ -124,12 +124,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -150,10 +150,10 @@
 *  =====================================================================
       SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA
diff --git a/lib/linalg/zher2k.f b/lib/linalg/zher2k.f
index 0b91bd2cbbf09f79d782ac7b1b05313ca55c9f7e..474c65e5755eb6a910dba45896bbef33b3a5e219 100644
--- a/lib/linalg/zher2k.f
+++ b/lib/linalg/zher2k.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA
 *       DOUBLE PRECISION BETA
@@ -19,7 +19,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -95,7 +95,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
+*>          A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is
 *>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
 *>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
 *>           part of the array  A  must contain the matrix  A,  otherwise
@@ -114,7 +114,7 @@
 *>
 *> \param[in] B
 *> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
+*>          B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is
 *>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
 *>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
 *>           part of the array  B  must contain the matrix  B,  otherwise
@@ -140,7 +140,7 @@
 *>
 *> \param[in,out] C
 *> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
+*>          C is COMPLEX*16 array, dimension ( LDC, N )
 *>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
 *>           upper triangular part of the array C must contain the upper
 *>           triangular part  of the  hermitian matrix  and the strictly
@@ -169,12 +169,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level3
 *
@@ -198,10 +198,10 @@
 *  =====================================================================
       SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA
diff --git a/lib/linalg/zhetd2.f b/lib/linalg/zhetd2.f
index dd8f9cf0145642ee5db89462d087c668bb6a6bf7..6c5b8aae3dd4602faf5be92f8b325b8b81b1d8e3 100644
--- a/lib/linalg/zhetd2.f
+++ b/lib/linalg/zhetd2.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZHETD2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetd2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetd2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetd2.f"> 
+*> Download ZHETD2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetd2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetd2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetd2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   D( * ), E( * )
 *       COMPLEX*16         A( LDA, * ), TAU( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -117,12 +117,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16HEcomputational
 *
@@ -175,10 +175,10 @@
 *  =====================================================================
       SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zhetrd.f b/lib/linalg/zhetrd.f
index c6074846379f79f0ff837209d81e2d0fc28cecb2..51c9fc2ec9d0ffd42261631105dcac4744795fc2 100644
--- a/lib/linalg/zhetrd.f
+++ b/lib/linalg/zhetrd.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZHETRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f"> 
+*> Download ZHETRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   D( * ), E( * )
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -134,12 +134,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16HEcomputational
 *
@@ -192,10 +192,10 @@
 *  =====================================================================
       SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zhpr.f b/lib/linalg/zhpr.f
index 42e61196baad39aec6bd80f05a1e433e4947c07c..af82dfbd8c291c204ecae2d82d4234357be4d7de 100644
--- a/lib/linalg/zhpr.f
+++ b/lib/linalg/zhpr.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP)
-* 
+*
 *       .. Scalar Arguments ..
 *       DOUBLE PRECISION ALPHA
 *       INTEGER INCX,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 AP(*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -65,7 +65,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x.
@@ -80,7 +80,7 @@
 *>
 *> \param[in,out] AP
 *> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
+*>          AP is COMPLEX*16 array, dimension at least
 *>           ( ( n*( n + 1 ) )/2 ).
 *>           Before entry with  UPLO = 'U' or 'u', the array AP must
 *>           contain the upper triangular part of the hermitian matrix
@@ -104,12 +104,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -130,10 +130,10 @@
 *  =====================================================================
       SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       DOUBLE PRECISION ALPHA
diff --git a/lib/linalg/zlacgv.f b/lib/linalg/zlacgv.f
index 315c4de5ce103048eeab7d103a20a8978de13005..1e3ca6e73ff918a5a1d358c4d8bc03b4dff832e9 100644
--- a/lib/linalg/zlacgv.f
+++ b/lib/linalg/zlacgv.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLACGV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlacgv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlacgv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlacgv.f"> 
+*> Download ZLACGV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlacgv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlacgv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlacgv.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLACGV( N, X, INCX )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -62,22 +62,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLACGV( N, X, INCX )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/zladiv.f b/lib/linalg/zladiv.f
index 8f01fe3e63b2296c728d402a7d776f40b2c27539..0bf6ea87d5f08210a2dae8174a6fa758af91ad6c 100644
--- a/lib/linalg/zladiv.f
+++ b/lib/linalg/zladiv.f
@@ -2,28 +2,28 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLADIV + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zladiv.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zladiv.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zladiv.f"> 
+*> Download ZLADIV + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zladiv.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zladiv.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zladiv.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       COMPLEX*16     FUNCTION ZLADIV( X, Y )
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16         X, Y
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -52,22 +52,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       COMPLEX*16     FUNCTION ZLADIV( X, Y )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16         X, Y
diff --git a/lib/linalg/zlanhe.f b/lib/linalg/zlanhe.f
index 3093a151afe516731e2b2ccfb438407b0da3dbce..7c7f7f3be4c1751e08a510ee62cd001a422921d9 100644
--- a/lib/linalg/zlanhe.f
+++ b/lib/linalg/zlanhe.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLANHE + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlanhe.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlanhe.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlanhe.f"> 
+*> Download ZLANHE + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlanhe.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlanhe.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlanhe.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          NORM, UPLO
 *       INTEGER            LDA, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   WORK( * )
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -112,22 +112,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16HEauxiliary
 *
 *  =====================================================================
       DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
diff --git a/lib/linalg/zlarf.f b/lib/linalg/zlarf.f
index f51e1d73831544937bfb8f5f66c83bbb0edf6a8e..f1be80d37bdd56e329efa9304cf1907f8653ba62 100644
--- a/lib/linalg/zlarf.f
+++ b/lib/linalg/zlarf.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLARF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarf.f"> 
+*> Download ZLARF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          SIDE
 *       INTEGER            INCV, LDC, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         C( LDC, * ), V( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,22 +116,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          SIDE
diff --git a/lib/linalg/zlarfb.f b/lib/linalg/zlarfb.f
index 99490f5827ffad2e19c79cf4da285dd6e4f8b681..b4a2b4d1a0474d4af19e412b82161eb3591343fe 100644
--- a/lib/linalg/zlarfb.f
+++ b/lib/linalg/zlarfb.f
@@ -2,25 +2,25 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLARFB + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfb.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfb.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfb.f"> 
+*> Download ZLARFB + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfb.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfb.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfb.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
 *                          T, LDT, C, LDC, WORK, LDWORK )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, SIDE, STOREV, TRANS
 *       INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
@@ -29,7 +29,7 @@
 *       COMPLEX*16         C( LDC, * ), T( LDT, * ), V( LDV, * ),
 *      $                   WORK( LDWORK, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -154,12 +154,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2013
 *
 *> \ingroup complex16OTHERauxiliary
 *
@@ -195,10 +195,10 @@
       SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
      $                   T, LDT, C, LDC, WORK, LDWORK )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2013
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, SIDE, STOREV, TRANS
@@ -217,12 +217,11 @@
 *     ..
 *     .. Local Scalars ..
       CHARACTER          TRANST
-      INTEGER            I, J, LASTV, LASTC
+      INTEGER            I, J
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
-      INTEGER            ILAZLR, ILAZLC
-      EXTERNAL           LSAME, ILAZLR, ILAZLC
+      EXTERNAL           LSAME
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           ZCOPY, ZGEMM, ZLACGV, ZTRMM
@@ -255,36 +254,33 @@
 *
 *              Form  H * C  or  H**H * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTV = MAX( K, ILAZLR( M, K, V, LDV ) )
-               LASTC = ILAZLC( LASTV, N, C, LDC )
 *
 *              W := C**H * V  =  (C1**H * V1 + C2**H * V2)  (stored in WORK)
 *
 *              W := C1**H
 *
                DO 10 J = 1, K
-                  CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
-                  CALL ZLACGV( LASTC, WORK( 1, J ), 1 )
+                  CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
    10          CONTINUE
 *
 *              W := W * V1
 *
-               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
 *
-*                 W := W + C2**H *V2
+*                 W := W + C2**H * V2
 *
-                  CALL ZGEMM( 'Conjugate transpose', 'No transpose',
-     $                 LASTC, K, LASTV-K, ONE, C( K+1, 1 ), LDC,
-     $                 V( K+1, 1 ), LDV, ONE, WORK, LDWORK )
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C( K+1, 1 ), LDC,
+     $                        V( K+1, 1 ), LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T**H  or  W * T
 *
-               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V * W**H
 *
@@ -293,20 +289,19 @@
 *                 C2 := C2 - V2 * W**H
 *
                   CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 LASTV-K, LASTC, K,
-     $                 -ONE, V( K+1, 1 ), LDV, WORK, LDWORK,
-     $                 ONE, C( K+1, 1 ), LDC )
+     $                        M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK,
+     $                        LDWORK, ONE, C( K+1, 1 ), LDC )
                END IF
 *
 *              W := W * V1**H
 *
                CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK )
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W**H
 *
                DO 30 J = 1, K
-                  DO 20 I = 1, LASTC
+                  DO 20 I = 1, N
                      C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) )
    20             CONTINUE
    30          CONTINUE
@@ -314,58 +309,53 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**H  where  C = ( C1  C2 )
-*
-               LASTV = MAX( K, ILAZLR( N, K, V, LDV ) )
-               LASTC = ILAZLR( M, LASTV, C, LDC )
 *
 *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 *
 *              W := C1
 *
                DO 40 J = 1, K
-                  CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
+                  CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
    40          CONTINUE
 *
 *              W := W * V1
 *
-               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
 *
 *                 W := W + C2 * V2
 *
-                  CALL ZGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, K, LASTV-K,
-     $                 ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**H
 *
-               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V**H
 *
-               IF( LASTV.GT.K ) THEN
+               IF( N.GT.K ) THEN
 *
 *                 C2 := C2 - W * V2**H
 *
-                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 LASTC, LASTV-K, K,
-     $                 -ONE, WORK, LDWORK, V( K+1, 1 ), LDV,
-     $                 ONE, C( 1, K+1 ), LDC )
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ),
+     $                        LDV, ONE, C( 1, K+1 ), LDC )
                END IF
 *
 *              W := W * V1**H
 *
                CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK )
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 60 J = 1, K
-                  DO 50 I = 1, LASTC
+                  DO 50 I = 1, M
                      C( I, J ) = C( I, J ) - WORK( I, J )
    50             CONTINUE
    60          CONTINUE
@@ -381,38 +371,33 @@
 *
 *              Form  H * C  or  H**H * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTC = ILAZLC( M, N, C, LDC )
 *
 *              W := C**H * V  =  (C1**H * V1 + C2**H * V2)  (stored in WORK)
 *
 *              W := C2**H
 *
                DO 70 J = 1, K
-                  CALL ZCOPY( LASTC, C( M-K+J, 1 ), LDC,
-     $                 WORK( 1, J ), 1 )
-                  CALL ZLACGV( LASTC, WORK( 1, J ), 1 )
+                  CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
    70          CONTINUE
 *
 *              W := W * V2
 *
-               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( M-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
                IF( M.GT.K ) THEN
 *
-*                 W := W + C1**H*V1
+*                 W := W + C1**H * V1
 *
-                  CALL ZGEMM( 'Conjugate transpose', 'No transpose',
-     $                 LASTC, K, M-K,
-     $                 ONE, C, LDC, V, LDV,
-     $                 ONE, WORK, LDWORK )
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
                END IF
 *
 *              W := W * T**H  or  W * T
 *
-               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V * W**H
 *
@@ -421,21 +406,20 @@
 *                 C1 := C1 - V1 * W**H
 *
                   CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 M-K, LASTC, K,
-     $                 -ONE, V, LDV, WORK, LDWORK,
-     $                 ONE, C, LDC )
+     $                        M-K, N, K, -ONE, V, LDV, WORK, LDWORK,
+     $                        ONE, C, LDC )
                END IF
 *
 *              W := W * V2**H
 *
                CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V( M-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+     $                     'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
 *
 *              C2 := C2 - W**H
 *
                DO 90 J = 1, K
-                  DO 80 I = 1, LASTC
+                  DO 80 I = 1, N
                      C( M-K+J, I ) = C( M-K+J, I ) -
      $                               DCONJG( WORK( I, J ) )
    80             CONTINUE
@@ -444,36 +428,31 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**H  where  C = ( C1  C2 )
-*
-               LASTC = ILAZLR( M, N, C, LDC )
 *
 *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 *
 *              W := C2
 *
                DO 100 J = 1, K
-                  CALL ZCOPY( LASTC, C( 1, N-K+J ), 1,
-     $                 WORK( 1, J ), 1 )
+                  CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
   100          CONTINUE
 *
 *              W := W * V2
 *
-               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( N-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
                IF( N.GT.K ) THEN
 *
 *                 W := W + C1 * V1
 *
-                  CALL ZGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, K, N-K,
-     $                 ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**H
 *
-               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V**H
 *
@@ -481,23 +460,22 @@
 *
 *                 C1 := C1 - W * V1**H
 *
-                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV,
-     $                 ONE, C, LDC )
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE,
+     $                        C, LDC )
                END IF
 *
 *              W := W * V2**H
 *
                CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V( N-K+1, 1 ), LDV,
-     $              WORK, LDWORK )
+     $                     'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
 *
 *              C2 := C2 - W
 *
                DO 120 J = 1, K
-                  DO 110 I = 1, LASTC
-                     C( I, N-K+J ) = C( I, N-K+J )
-     $                    - WORK( I, J )
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
   110             CONTINUE
   120          CONTINUE
             END IF
@@ -514,59 +492,56 @@
 *
 *              Form  H * C  or  H**H * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTV = MAX( K, ILAZLC( K, M, V, LDV ) )
-               LASTC = ILAZLC( LASTV, N, C, LDC )
 *
 *              W := C**H * V**H  =  (C1**H * V1**H + C2**H * V2**H) (stored in WORK)
 *
 *              W := C1**H
 *
                DO 130 J = 1, K
-                  CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
-                  CALL ZLACGV( LASTC, WORK( 1, J ), 1 )
+                  CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
   130          CONTINUE
 *
 *              W := W * V1**H
 *
                CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
-     $                     'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
 *
-*                 W := W + C2**H*V2**H
+*                 W := W + C2**H * V2**H
 *
                   CALL ZGEMM( 'Conjugate transpose',
-     $                 'Conjugate transpose', LASTC, K, LASTV-K,
-     $                 ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV,
-     $                 ONE, WORK, LDWORK )
+     $                        'Conjugate transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
                END IF
 *
 *              W := W * T**H  or  W * T
 *
-               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V**H * W**H
 *
-               IF( LASTV.GT.K ) THEN
+               IF( M.GT.K ) THEN
 *
 *                 C2 := C2 - V2**H * W**H
 *
                   CALL ZGEMM( 'Conjugate transpose',
-     $                 'Conjugate transpose', LASTV-K, LASTC, K,
-     $                 -ONE, V( 1, K+1 ), LDV, WORK, LDWORK,
-     $                 ONE, C( K+1, 1 ), LDC )
+     $                        'Conjugate transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
                END IF
 *
 *              W := W * V1
 *
-               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W**H
 *
                DO 150 J = 1, K
-                  DO 140 I = 1, LASTC
+                  DO 140 I = 1, N
                      C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) )
   140             CONTINUE
   150          CONTINUE
@@ -574,57 +549,53 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**H  where  C = ( C1  C2 )
-*
-               LASTV = MAX( K, ILAZLC( K, N, V, LDV ) )
-               LASTC = ILAZLR( M, LASTV, C, LDC )
 *
 *              W := C * V**H  =  (C1*V1**H + C2*V2**H)  (stored in WORK)
 *
 *              W := C1
 *
                DO 160 J = 1, K
-                  CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
+                  CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
   160          CONTINUE
 *
 *              W := W * V1**H
 *
                CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
-     $                     'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK )
-               IF( LASTV.GT.K ) THEN
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
 *
 *                 W := W + C2 * V2**H
 *
-                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 LASTC, K, LASTV-K, ONE, C( 1, K+1 ), LDC,
-     $                 V( 1, K+1 ), LDV, ONE, WORK, LDWORK )
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C( 1, K+1 ), LDC,
+     $                        V( 1, K+1 ), LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**H
 *
-               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V
 *
-               IF( LASTV.GT.K ) THEN
+               IF( N.GT.K ) THEN
 *
 *                 C2 := C2 - W * V2
 *
-                  CALL ZGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, LASTV-K, K,
-     $                 -ONE, WORK, LDWORK, V( 1, K+1 ), LDV,
-     $                 ONE, C( 1, K+1 ), LDC )
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
                END IF
 *
 *              W := W * V1
 *
-               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V, LDV, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 180 J = 1, K
-                  DO 170 I = 1, LASTC
+                  DO 170 I = 1, M
                      C( I, J ) = C( I, J ) - WORK( I, J )
   170             CONTINUE
   180          CONTINUE
@@ -640,37 +611,34 @@
 *
 *              Form  H * C  or  H**H * C  where  C = ( C1 )
 *                                                    ( C2 )
-*
-               LASTC = ILAZLC( M, N, C, LDC )
 *
 *              W := C**H * V**H  =  (C1**H * V1**H + C2**H * V2**H) (stored in WORK)
 *
 *              W := C2**H
 *
                DO 190 J = 1, K
-                  CALL ZCOPY( LASTC, C( M-K+J, 1 ), LDC,
-     $                 WORK( 1, J ), 1 )
-                  CALL ZLACGV( LASTC, WORK( 1, J ), 1 )
+                  CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
   190          CONTINUE
 *
 *              W := W * V2**H
 *
                CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V( 1, M-K+1 ), LDV,
-     $              WORK, LDWORK )
+     $                     'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK,
+     $                     LDWORK )
                IF( M.GT.K ) THEN
 *
 *                 W := W + C1**H * V1**H
 *
                   CALL ZGEMM( 'Conjugate transpose',
-     $                 'Conjugate transpose', LASTC, K, M-K,
-     $                 ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+     $                        'Conjugate transpose', N, K, M-K, ONE, C,
+     $                        LDC, V, LDV, ONE, WORK, LDWORK )
                END IF
 *
 *              W := W * T**H  or  W * T
 *
-               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - V**H * W**H
 *
@@ -679,20 +647,19 @@
 *                 C1 := C1 - V1**H * W**H
 *
                   CALL ZGEMM( 'Conjugate transpose',
-     $                 'Conjugate transpose', M-K, LASTC, K,
-     $                 -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC )
+     $                        'Conjugate transpose', M-K, N, K, -ONE, V,
+     $                        LDV, WORK, LDWORK, ONE, C, LDC )
                END IF
 *
 *              W := W * V2
 *
-               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, M-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
 *
 *              C2 := C2 - W**H
 *
                DO 210 J = 1, K
-                  DO 200 I = 1, LASTC
+                  DO 200 I = 1, N
                      C( M-K+J, I ) = C( M-K+J, I ) -
      $                               DCONJG( WORK( I, J ) )
   200             CONTINUE
@@ -701,36 +668,33 @@
             ELSE IF( LSAME( SIDE, 'R' ) ) THEN
 *
 *              Form  C * H  or  C * H**H  where  C = ( C1  C2 )
-*
-               LASTC = ILAZLR( M, N, C, LDC )
 *
 *              W := C * V**H  =  (C1*V1**H + C2*V2**H)  (stored in WORK)
 *
 *              W := C2
 *
                DO 220 J = 1, K
-                  CALL ZCOPY( LASTC, C( 1, N-K+J ), 1,
-     $                 WORK( 1, J ), 1 )
+                  CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
   220          CONTINUE
 *
 *              W := W * V2**H
 *
                CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
-     $              'Unit', LASTC, K, ONE, V( 1, N-K+1 ), LDV,
-     $              WORK, LDWORK )
+     $                     'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK,
+     $                     LDWORK )
                IF( N.GT.K ) THEN
 *
 *                 W := W + C1 * V1**H
 *
-                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
-     $                 LASTC, K, N-K, ONE, C, LDC, V, LDV, ONE,
-     $                 WORK, LDWORK )
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
                END IF
 *
 *              W := W * T  or  W * T**H
 *
-               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
-     $              LASTC, K, ONE, T, LDT, WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
 *
 *              C := C - W * V
 *
@@ -738,21 +702,19 @@
 *
 *                 C1 := C1 - W * V1
 *
-                  CALL ZGEMM( 'No transpose', 'No transpose',
-     $                 LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV,
-     $                 ONE, C, LDC )
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
                END IF
 *
 *              W := W * V2
 *
-               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
-     $              LASTC, K, ONE, V( 1, N-K+1 ), LDV,
-     $              WORK, LDWORK )
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
 *
 *              C1 := C1 - W
 *
                DO 240 J = 1, K
-                  DO 230 I = 1, LASTC
+                  DO 230 I = 1, M
                      C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
   230             CONTINUE
   240          CONTINUE
diff --git a/lib/linalg/zlarfg.f b/lib/linalg/zlarfg.f
index e37c683fc9acbf85612ff8fb338c2fa3a64944e3..f8a795d547cf925a2d12429e3f82798aed83a887 100644
--- a/lib/linalg/zlarfg.f
+++ b/lib/linalg/zlarfg.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLARFG + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfg.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfg.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfg.f"> 
+*> Download ZLARFG + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfg.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfg.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfg.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       COMPLEX*16         ALPHA, TAU
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -94,22 +94,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/zlarft.f b/lib/linalg/zlarft.f
index 2278d11d2b3d89098c2f3296766a3e0ecd6ea485..78ad2f1481d54304eca6927939864560e1df7d84 100644
--- a/lib/linalg/zlarft.f
+++ b/lib/linalg/zlarft.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLARFT + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f"> 
+*> Download ZLARFT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -125,12 +125,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
@@ -163,10 +163,10 @@
 *  =====================================================================
       SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, STOREV
@@ -187,7 +187,7 @@
       INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ZGEMV, ZLACGV, ZTRMV
+      EXTERNAL           ZGEMV, ZTRMV, ZGEMM
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
@@ -222,13 +222,13 @@
                   END DO
                   DO J = 1, I-1
                      T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
-                  END DO                     
+                  END DO
                   J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
 *
                   CALL ZGEMV( 'Conjugate transpose', J-I, I-1,
-     $                        -TAU( I ), V( I+1, 1 ), LDV, 
+     $                        -TAU( I ), V( I+1, 1 ), LDV,
      $                        V( I+1, I ), 1, ONE, T( 1, I ), 1 )
                ELSE
 *                 Skip any trailing zeros.
@@ -237,14 +237,14 @@
                   END DO
                   DO J = 1, I-1
                      T( J, I ) = -TAU( I ) * V( J , I )
-                  END DO                     
+                  END DO
                   J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
 *
                   CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
      $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
-     $                        ONE, T( 1, I ), LDT )                  
+     $                        ONE, T( 1, I ), LDT )
                END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
@@ -281,7 +281,7 @@
                      END DO
                      DO J = I+1, K
                         T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
-                     END DO                        
+                     END DO
                      J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
@@ -296,14 +296,14 @@
                      END DO
                      DO J = I+1, K
                         T( J, I ) = -TAU( I ) * V( J, N-K+I )
-                     END DO                                           
+                     END DO
                      J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
 *
                      CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ),
      $                           V( I+1, J ), LDV, V( I, J ), LDV,
-     $                           ONE, T( I+1, I ), LDT )                     
+     $                           ONE, T( I+1, I ), LDT )
                   END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
diff --git a/lib/linalg/zlascl.f b/lib/linalg/zlascl.f
index 51a4f0f61494c3507dde135c77a2efa21c1d8053..c53c6f5ad7bc1c823d4b821ca7d05c682037c0d5 100644
--- a/lib/linalg/zlascl.f
+++ b/lib/linalg/zlascl.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLASCL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlascl.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlascl.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlascl.f"> 
+*> Download ZLASCL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlascl.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlascl.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlascl.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          TYPE
 *       INTEGER            INFO, KL, KU, LDA, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -114,7 +114,11 @@
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
-*>          The leading dimension of the array A.  LDA >= max(1,M).
+*>          The leading dimension of the array A.
+*>          If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M);
+*>             TYPE = 'B', LDA >= KL+1;
+*>             TYPE = 'Q', LDA >= KU+1;
+*>             TYPE = 'Z', LDA >= 2*KL+KU+1.
 *> \endverbatim
 *>
 *> \param[out] INFO
@@ -127,22 +131,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date June 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     June 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          TYPE
diff --git a/lib/linalg/zlaset.f b/lib/linalg/zlaset.f
index 11f82361b741c14ee49356042759aadddc4f3b34..796678217b50613596a416e9c0730f6410d16903 100644
--- a/lib/linalg/zlaset.f
+++ b/lib/linalg/zlaset.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLASET + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaset.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaset.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaset.f"> 
+*> Download ZLASET + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaset.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaset.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaset.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, M, N
@@ -28,7 +28,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -77,7 +77,7 @@
 *>          All the diagonal array elements are set to BETA.
 *> \endverbatim
 *>
-*> \param[in,out] A
+*> \param[out] A
 *> \verbatim
 *>          A is COMPLEX*16 array, dimension (LDA,N)
 *>          On entry, the m by n matrix A.
@@ -94,22 +94,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zlasr.f b/lib/linalg/zlasr.f
index 5243d8304a953a7d9b47cfdaa41be83b2646907a..69891ba522aeef87189f6bd7257dd5cb4df37a52 100644
--- a/lib/linalg/zlasr.f
+++ b/lib/linalg/zlasr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLASR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlasr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlasr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlasr.f"> 
+*> Download ZLASR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlasr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlasr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlasr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, PIVOT, SIDE
 *       INTEGER            LDA, M, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   C( * ), S( * )
 *       COMPLEX*16         A( LDA, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -49,23 +49,23 @@
 *> where P is an orthogonal matrix consisting of a sequence of z plane
 *> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
 *> and P**T is the transpose of P.
-*> 
+*>
 *> When DIRECT = 'F' (Forward sequence), then
-*> 
+*>
 *>    P = P(z-1) * ... * P(2) * P(1)
-*> 
+*>
 *> and when DIRECT = 'B' (Backward sequence), then
-*> 
+*>
 *>    P = P(1) * P(2) * ... * P(z-1)
-*> 
+*>
 *> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
-*> 
+*>
 *>    R(k) = (  c(k)  s(k) )
 *>         = ( -s(k)  c(k) ).
-*> 
+*>
 *> When PIVOT = 'V' (Variable pivot), the rotation is performed
 *> for the plane (k,k+1), i.e., P(k) has the form
-*> 
+*>
 *>    P(k) = (  1                                            )
 *>           (       ...                                     )
 *>           (              1                                )
@@ -74,13 +74,13 @@
 *>           (                                1              )
 *>           (                                     ...       )
 *>           (                                            1  )
-*> 
+*>
 *> where R(k) appears as a rank-2 modification to the identity matrix in
 *> rows and columns k and k+1.
-*> 
+*>
 *> When PIVOT = 'T' (Top pivot), the rotation is performed for the
 *> plane (1,k+1), so P(k) has the form
-*> 
+*>
 *>    P(k) = (  c(k)                    s(k)                 )
 *>           (         1                                     )
 *>           (              ...                              )
@@ -89,12 +89,12 @@
 *>           (                                 1             )
 *>           (                                      ...      )
 *>           (                                             1 )
-*> 
+*>
 *> where R(k) appears in rows and columns 1 and k+1.
-*> 
+*>
 *> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
 *> performed for the plane (k,z), giving P(k) the form
-*> 
+*>
 *>    P(k) = ( 1                                             )
 *>           (      ...                                      )
 *>           (             1                                 )
@@ -103,7 +103,7 @@
 *>           (                              ...              )
 *>           (                                     1         )
 *>           (                 -s(k)                    c(k) )
-*> 
+*>
 *> where R(k) appears in rows and columns k and z.  The rotations are
 *> performed without ever forming P(k) explicitly.
 *> \endverbatim
@@ -188,22 +188,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIRECT, PIVOT, SIDE
diff --git a/lib/linalg/zlassq.f b/lib/linalg/zlassq.f
index 5b7e66c30bd421e41b836e4262903dc952022712..fd13811bd996cc8a548e7b9eb161f22ba9a0642f 100644
--- a/lib/linalg/zlassq.f
+++ b/lib/linalg/zlassq.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLASSQ + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlassq.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlassq.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlassq.f"> 
+*> Download ZLASSQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlassq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlassq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlassq.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INCX, N
 *       DOUBLE PRECISION   SCALE, SUMSQ
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         X( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -94,22 +94,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INCX, N
diff --git a/lib/linalg/zlatrd.f b/lib/linalg/zlatrd.f
index 619d7280c482270f6117235e941bf529e347ebc0..ccc040993f81a1fc23d3ede5c58e3b64154f1943 100644
--- a/lib/linalg/zlatrd.f
+++ b/lib/linalg/zlatrd.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZLATRD + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlatrd.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlatrd.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlatrd.f"> 
+*> Download ZLATRD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlatrd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlatrd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlatrd.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            LDA, LDW, N, NB
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   E( * )
 *       COMPLEX*16         A( LDA, * ), TAU( * ), W( LDW, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -135,12 +135,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERauxiliary
 *
@@ -199,10 +199,10 @@
 *  =====================================================================
       SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
 *
-*  -- LAPACK auxiliary routine (version 3.4.2) --
+*  -- LAPACK auxiliary routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zpptrf.f b/lib/linalg/zpptrf.f
index c34aff332a508c367cb881f5ae12e508f93506fc..6e50b4682859468ab43c4c10bdce954048f51f39 100644
--- a/lib/linalg/zpptrf.f
+++ b/lib/linalg/zpptrf.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZPPTRF + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zpptrf.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zpptrf.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zpptrf.f"> 
+*> Download ZPPTRF + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zpptrf.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zpptrf.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zpptrf.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZPPTRF( UPLO, N, AP, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         AP( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -87,12 +87,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
@@ -119,10 +119,10 @@
 *  =====================================================================
       SUBROUTINE ZPPTRF( UPLO, N, AP, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zpptri.f b/lib/linalg/zpptri.f
index 09467974502f8b57301352e23399988060b4e3ef..cde2f6dc72f6662baf82bac3753df36cee3af0a5 100644
--- a/lib/linalg/zpptri.f
+++ b/lib/linalg/zpptri.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZPPTRI + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zpptri.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zpptri.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zpptri.f"> 
+*> Download ZPPTRI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zpptri.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zpptri.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zpptri.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZPPTRI( UPLO, N, AP, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         AP( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -81,22 +81,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZPPTRI( UPLO, N, AP, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/linalg/zscal.f b/lib/linalg/zscal.f
index ad28a10a9b8e35641cc08e70560a8d6fa112e67e..9f6d4b1d3911851bfcb15a431d2ed64797d2e4ec 100644
--- a/lib/linalg/zscal.f
+++ b/lib/linalg/zscal.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZSCAL(N,ZA,ZX,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ZA
 *       INTEGER INCX,N
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -27,15 +27,41 @@
 *>    ZSCAL scales a vector by a constant.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in] ZA
+*> \verbatim
+*>          ZA is COMPLEX*16
+*>           On entry, ZA specifies the scalar alpha.
+*> \endverbatim
+*>
+*> \param[in,out] ZX
+*> \verbatim
+*>          ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -52,10 +78,10 @@
 *  =====================================================================
       SUBROUTINE ZSCAL(N,ZA,ZX,INCX)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ZA
diff --git a/lib/linalg/zsteqr.f b/lib/linalg/zsteqr.f
index 33af78e854425201ba713272e0532406d325ad8b..ac47890685db9e93e80bfff546ebfb1327c18500 100644
--- a/lib/linalg/zsteqr.f
+++ b/lib/linalg/zsteqr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZSTEQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsteqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsteqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsteqr.f"> 
+*> Download ZSTEQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsteqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsteqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsteqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          COMPZ
 *       INTEGER            INFO, LDZ, N
@@ -28,7 +28,7 @@
 *       DOUBLE PRECISION   D( * ), E( * ), WORK( * )
 *       COMPLEX*16         Z( LDZ, * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -120,22 +120,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          COMPZ
diff --git a/lib/linalg/zswap.f b/lib/linalg/zswap.f
index ca2f34721192c53ee11ec4e0dc05a722e5e4cfe8..6768d5e6e04e0c8e67065bbb79a7b53b299f6bff 100644
--- a/lib/linalg/zswap.f
+++ b/lib/linalg/zswap.f
@@ -2,21 +2,21 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,INCY,N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16 ZX(*),ZY(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -26,15 +26,46 @@
 *>    ZSWAP interchanges two vectors.
 *> \endverbatim
 *
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         number of elements in input vector(s)
+*> \endverbatim
+*>
+*> \param[in,out] ZX
+*> \verbatim
+*>          ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
+*> \endverbatim
+*>
+*> \param[in] INCX
+*> \verbatim
+*>          INCX is INTEGER
+*>         storage spacing between elements of ZX
+*> \endverbatim
+*>
+*> \param[in,out] ZY
+*> \verbatim
+*>          ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
+*> \endverbatim
+*>
+*> \param[in] INCY
+*> \verbatim
+*>          INCY is INTEGER
+*>         storage spacing between elements of ZY
+*> \endverbatim
+*
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level1
 *
@@ -50,10 +81,10 @@
 *  =====================================================================
       SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY)
 *
-*  -- Reference BLAS level1 routine (version 3.4.0) --
+*  -- Reference BLAS level1 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,INCY,N
diff --git a/lib/linalg/ztpmv.f b/lib/linalg/ztpmv.f
index e277ec1a6ed073a08a32c1da030760b6f0290776..65aa2a0abc131fe0ec4222f792ae4d8355c7d9ef 100644
--- a/lib/linalg/ztpmv.f
+++ b/lib/linalg/ztpmv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       CHARACTER DIAG,TRANS,UPLO
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 AP(*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -80,7 +80,7 @@
 *>
 *> \param[in] AP
 *> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
+*>          AP is COMPLEX*16 array, dimension at least
 *>           ( ( n*( n + 1 ) )/2 ).
 *>           Before entry with  UPLO = 'U' or 'u', the array AP must
 *>           contain the upper triangular matrix packed sequentially,
@@ -96,13 +96,13 @@
 *>           A are not referenced, but are assumed to be unity.
 *> \endverbatim
 *>
-*> \param[in] X
+*> \param[in,out] X
 *> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x. On exit, X is overwritten with the
-*>           tranformed vector x.
+*>           transformed vector x.
 *> \endverbatim
 *>
 *> \param[in] INCX
@@ -115,12 +115,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -142,10 +142,10 @@
 *  =====================================================================
       SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/ztpsv.f b/lib/linalg/ztpsv.f
index 0e75f9facf5641676feca1c69c364372d845f4fc..538888424afe8f4a20b5cb373051d0978fb65668 100644
--- a/lib/linalg/ztpsv.f
+++ b/lib/linalg/ztpsv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,N
 *       CHARACTER DIAG,TRANS,UPLO
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 AP(*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -83,7 +83,7 @@
 *>
 *> \param[in] AP
 *> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
+*>          AP is COMPLEX*16 array, dimension at least
 *>           ( ( n*( n + 1 ) )/2 ).
 *>           Before entry with  UPLO = 'U' or 'u', the array AP must
 *>           contain the upper triangular matrix packed sequentially,
@@ -101,7 +101,7 @@
 *>
 *> \param[in,out] X
 *> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element right-hand side vector b. On exit, X is overwritten
@@ -118,12 +118,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -144,10 +144,10 @@
 *  =====================================================================
       SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,N
diff --git a/lib/linalg/ztptri.f b/lib/linalg/ztptri.f
index 187c9ccac12875318e0ad719ef2912f6f64640aa..35388194c3aa03214a2fdd2c190bd95a338eb373 100644
--- a/lib/linalg/ztptri.f
+++ b/lib/linalg/ztptri.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZTPTRI + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ztptri.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ztptri.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ztptri.f"> 
+*> Download ZTPTRI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ztptri.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ztptri.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ztptri.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, UPLO
 *       INTEGER            INFO, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         AP( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -86,12 +86,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
@@ -117,10 +117,10 @@
 *  =====================================================================
       SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, UPLO
diff --git a/lib/linalg/ztrmm.f b/lib/linalg/ztrmm.f
index ba7aead68b5df58348242a8497ded795e1215168..0f445f52a7803fa2c5842d897f9897dc946fc3d1 100644
--- a/lib/linalg/ztrmm.f
+++ b/lib/linalg/ztrmm.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-* 
+*
 *       .. Scalar Arguments ..
 *       COMPLEX*16 ALPHA
 *       INTEGER LDA,LDB,M,N
@@ -18,7 +18,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),B(LDB,*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -109,7 +109,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m
+*>          A is COMPLEX*16 array, dimension ( LDA, k ), where k is m
 *>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
 *>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
 *>           upper triangular part of the array  A must contain the upper
@@ -132,9 +132,9 @@
 *>           then LDA must be at least max( 1, n ).
 *> \endverbatim
 *>
-*> \param[in] B
+*> \param[in,out] B
 *> \verbatim
-*>          B is (input/output) COMPLEX*16 array of DIMENSION ( LDB, n ).
+*>          B is COMPLEX*16 array, dimension ( LDB, N ).
 *>           Before entry,  the leading  m by n part of the array  B must
 *>           contain the matrix  B,  and  on exit  is overwritten  by the
 *>           transformed matrix.
@@ -151,12 +151,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level3
 *
@@ -177,10 +177,10 @@
 *  =====================================================================
       SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
 *
-*  -- Reference BLAS level3 routine (version 3.4.0) --
+*  -- Reference BLAS level3 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       COMPLEX*16 ALPHA
diff --git a/lib/linalg/ztrmv.f b/lib/linalg/ztrmv.f
index 8d7974a059112c0604fd63890060bf17a1ff446c..52d1ae6799c5d6dec60b810ba66de046d216227e 100644
--- a/lib/linalg/ztrmv.f
+++ b/lib/linalg/ztrmv.f
@@ -2,14 +2,14 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER INCX,LDA,N
 *       CHARACTER DIAG,TRANS,UPLO
@@ -17,7 +17,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16 A(LDA,*),X(*)
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -80,7 +80,7 @@
 *>
 *> \param[in] A
 *> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
+*>          A is COMPLEX*16 array, dimension ( LDA, N ).
 *>           Before entry with  UPLO = 'U' or 'u', the leading n by n
 *>           upper triangular part of the array A must contain the upper
 *>           triangular matrix and the strictly lower triangular part of
@@ -101,13 +101,13 @@
 *>           max( 1, n ).
 *> \endverbatim
 *>
-*> \param[in] X
+*> \param[in,out] X
 *> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
+*>          X is COMPLEX*16 array, dimension at least
 *>           ( 1 + ( n - 1 )*abs( INCX ) ).
 *>           Before entry, the incremented array X must contain the n
 *>           element vector x. On exit, X is overwritten with the
-*>           tranformed vector x.
+*>           transformed vector x.
 *> \endverbatim
 *>
 *> \param[in] INCX
@@ -120,12 +120,12 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16_blas_level2
 *
@@ -147,10 +147,10 @@
 *  =====================================================================
       SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
 *
-*  -- Reference BLAS level2 routine (version 3.4.0) --
+*  -- Reference BLAS level2 routine (version 3.7.0) --
 *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER INCX,LDA,N
diff --git a/lib/linalg/zung2l.f b/lib/linalg/zung2l.f
index f8fd3667d26cf7230a3402eafe3b4d6c2868ac7a..1a48c4d6bcc44c6aa4c79f91489334d0c31d0494 100644
--- a/lib/linalg/zung2l.f
+++ b/lib/linalg/zung2l.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNG2L + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2l.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2l.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2l.f"> 
+*> Download ZUNG2L + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2l.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2l.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2l.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/zung2r.f b/lib/linalg/zung2r.f
index 63783ac01b65fabbe27433788720a21e1ec7bf9a..4a3fed0f0d8de3e2479a452464b49507b77ac795 100644
--- a/lib/linalg/zung2r.f
+++ b/lib/linalg/zung2r.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNG2R + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2r.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2r.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2r.f"> 
+*> Download ZUNG2R + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2r.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2r.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2r.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -102,22 +102,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/zungl2.f b/lib/linalg/zungl2.f
index 44acba12a6e2885de1ae366063ec4174d1491301..0774cc440541d5db32a99212ec8aeb502e0d6948 100644
--- a/lib/linalg/zungl2.f
+++ b/lib/linalg/zungl2.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNGL2 + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungl2.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungl2.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungl2.f"> 
+*> Download ZUNGL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungl2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungl2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungl2.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -101,22 +101,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date September 2012
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.2) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     September 2012
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, M, N
diff --git a/lib/linalg/zungql.f b/lib/linalg/zungql.f
index 5c77abbd4621d85ac27c9bb672f2f298f720c140..c63a47db56ad0a8973982f7d86266d74ef9992ab 100644
--- a/lib/linalg/zungql.f
+++ b/lib/linalg/zungql.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNGQL + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungql.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungql.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungql.f"> 
+*> Download ZUNGQL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungql.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungql.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungql.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,22 +116,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, LWORK, M, N
diff --git a/lib/linalg/zungqr.f b/lib/linalg/zungqr.f
index 6b3e9220cd41560a85637d0cbceedf4a77a4f8f2..5f95b64e883f60a521dd694ac106eaed1b00ae9d 100644
--- a/lib/linalg/zungqr.f
+++ b/lib/linalg/zungqr.f
@@ -2,31 +2,31 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNGQR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungqr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungqr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungqr.f"> 
+*> Download ZUNGQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungqr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, K, LDA, LWORK, M, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -116,22 +116,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, K, LDA, LWORK, M, N
diff --git a/lib/linalg/zungtr.f b/lib/linalg/zungtr.f
index 422a55a921ffd166a06c76cd8f37d3da5420feb5..728854332f2aedeb2b84d0163268ffa4755fccc9 100644
--- a/lib/linalg/zungtr.f
+++ b/lib/linalg/zungtr.f
@@ -2,24 +2,24 @@
 *
 *  =========== DOCUMENTATION ===========
 *
-* Online html documentation available at 
-*            http://www.netlib.org/lapack/explore-html/ 
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
-*> Download ZUNGTR + dependencies 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungtr.f"> 
-*> [TGZ]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungtr.f"> 
-*> [ZIP]</a> 
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungtr.f"> 
+*> Download ZUNGTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungtr.f">
 *> [TXT]</a>
-*> \endhtmlonly 
+*> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
-* 
+*
 *       .. Scalar Arguments ..
 *       CHARACTER          UPLO
 *       INTEGER            INFO, LDA, LWORK, N
@@ -27,7 +27,7 @@
 *       .. Array Arguments ..
 *       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 *       ..
-*  
+*
 *
 *> \par Purpose:
 *  =============
@@ -111,22 +111,22 @@
 *  Authors:
 *  ========
 *
-*> \author Univ. of Tennessee 
-*> \author Univ. of California Berkeley 
-*> \author Univ. of Colorado Denver 
-*> \author NAG Ltd. 
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
 *
-*> \date November 2011
+*> \date December 2016
 *
 *> \ingroup complex16OTHERcomputational
 *
 *  =====================================================================
       SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.4.0) --
+*  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2011
+*     December 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          UPLO
diff --git a/lib/meam/meam_setup_done.F b/lib/meam/meam_setup_done.F
index 8c22f4ca89c4e85ae93a1a78e97155d750a7fc99..c94bce8b445602ca6a8d8d3b0b56028d425f67e4 100644
--- a/lib/meam/meam_setup_done.F
+++ b/lib/meam/meam_setup_done.F
@@ -232,7 +232,8 @@ c     pairs, but need to include NN2 contributions to those pairs as
 c     well.
               if (lattce_meam(a,b).eq.'b1'.or.
      $             lattce_meam(a,b).eq.'b2'.or.
-     $             lattce_meam(a,b).eq.'l12') then
+     $             lattce_meam(a,b).eq.'l12'.or.
+     $             lattce_meam(a,b).eq.'dia') then
                 rarat = r*arat
 
 c               phi_aa
@@ -262,7 +263,8 @@ c               phi_bb
                 endif
 
                 if (lattce_meam(a,b).eq.'b1'.
-     $               or.lattce_meam(a,b).eq.'b2') then
+     $               or.lattce_meam(a,b).eq.'b2'.
+     $               or.lattce_meam(a,b).eq.'dia') then
 c     Add contributions to the B1 or B2 potential
                   call get_Zij(Z1,lattce_meam(a,b))
                   call get_Zij2(Z2,arat,scrn,lattce_meam(a,b),
@@ -720,9 +722,9 @@ c neighbor screening function for lattice type "latt"
         a = sqrt(2.d0)
         numscr = 4
       else if (latt.eq.'dia') then
-        Zij2 = 0
+        Zij2 = 12
         a = sqrt(8.d0/3.d0)
-        numscr = 4
+        numscr = 1
         if (cmin.lt.0.500001) then
 c          call error('can not do 2NN MEAM for dia')
         endif
diff --git a/lib/mscg/Install.py b/lib/mscg/Install.py
index 76c986ef6d5f2409c448f7eeaec92d1e06d759b3..ec70f13f6e7fecc770c4d92ff04cf62b3f585a53 100644
--- a/lib/mscg/Install.py
+++ b/lib/mscg/Install.py
@@ -30,9 +30,10 @@ make lib-mscg args="-p /usr/local/mscg-release " # use existing MS-CG installati
 
 # settings
 
-url = "http://github.com/uchicago-voth/MSCG-release/archive/master.tar.gz"
-tarfile = "MS-CG-master.tar.gz"
-tardir = "MSCG-release-master"
+mscgver = "1.7.3.1"
+url = "https://github.com/uchicago-voth/MSCG-release/archive/%s.tar.gz" % mscgver
+tarfile = "MS-CG-%s.tar.gz" % mscgver
+tardir = "MSCG-release-%s" % mscgver
 
 # print error message or help
 
diff --git a/potentials/ffield.reax.lg b/potentials/ffield.reax.lg
index e8650d7a9a9a6a88dc7b0889e0d193208570c218..15f303ac12b593c55f2d560d2b9b123def2e3f57 100644
--- a/potentials/ffield.reax.lg
+++ b/potentials/ffield.reax.lg
@@ -39,7 +39,7 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
     5.0000 !Molecular energy (not used)           
     0.0000 !Molecular energy (not used)           
     6.9784 !Valency angle conjugation parameter   
-  7    ! Nr of atoms; cov.r; valency;a.m;Rvdw;Evdw;gammaEEM;cov.r2;#
+  5    ! Nr of atoms; cov.r; valency;a.m;Rvdw;Evdw;gammaEEM;cov.r2;#
             alfa;gammavdW;valency;Eunder;Eover;chiEEM;etaEEM;n.u.               
             cov r3;Elp;Heat inc.;n.u.;n.u.;n.u.;n.u.                            
             ov/un;val1;n.u.;val3,vval4                                          
@@ -68,17 +68,7 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
       1.4703   9.4922  70.0338   8.5146  28.0801   8.5010   0.9745   0.0000
     -10.0773   2.7466   1.0338   6.2998   2.8793   1.8000   0.0000  14.0000
     180.0000   2.0783
- Si   2.0276   4.0000  28.0600   2.2042   0.1322   0.8218   1.5758   4.0000
-     11.9413   2.0618   4.0000  11.8211 136.4845   1.8038   7.3852   0.0000
-     -1.0000   0.0000 126.5331   6.4918   8.5961   0.2368   0.8563   0.0000
-     -3.8112   3.1873   1.0338   4.0000   2.5791   0.0000   0.0000   0.0000
-    180.0000   2.2042
- X   -0.1000   2.0000   1.0080   2.0000   0.0000   1.0000  -0.1000   6.0000
-     10.0000   2.5000   4.0000   0.0000   0.0000   8.5000   1.5000   0.0000
-     -0.1000   0.0000  -2.3700   8.7410  13.3640   0.6690   0.9745   0.0000
-    -11.0000   2.7466   1.0338   4.0000   2.8793   0.0000   0.0000   0.0000
-    180.0000   2.0000
- 18      ! Nr of bonds; Edis1;LPpen;n.u.;pbe1;pbo5;13corr;pbo6      
+ 10      ! Nr of bonds; Edis1;LPpen;n.u.;pbe1;pbo5;13corr;pbo6      
                          pbe2;pbo3;pbo4;Etrip;pbo1;pbo2;ovcorr                  
   1  1 141.9346 113.4487  67.6027   0.1554  -0.3045   1.0000  30.4515   0.4283
          0.0801  -0.2113   8.5395   1.0000  -0.0933   6.6967   1.0000   0.0000
@@ -100,34 +90,14 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
          5.1146   1.0000   0.0000   1.0000  -0.0532   5.1189   0.0000   0.0000
   2  4 208.0443   0.0000   0.0000  -0.3923   0.0000   1.0000   6.0000   0.3221
         10.5505   1.0000   0.0000   1.0000  -0.0690   6.2949   0.0000   0.0000
-  1  5 128.7959  56.4134  39.0716   0.0688  -0.4463   1.0000  31.1766   0.4530
-         0.1955  -0.3587   6.2148   1.0000  -0.0770   6.6386   1.0000   0.0000
-  2  5 128.6090   0.0000   0.0000  -0.5555   0.0000   1.0000   6.0000   0.4721
-        10.8735   1.0000   0.0000   1.0000  -0.0242   9.1937   1.0000   0.0000
-  3  5   0.0000   0.0000   0.0000   0.5563  -0.4038   1.0000  49.5611   0.6000
-         0.4259  -0.4577  12.7569   1.0000  -0.1100   7.1145   1.0000   0.0000
-  4  5   0.0000   0.0000   0.0000   0.4438  -0.2034   1.0000  40.3399   0.6000
-         0.3296  -0.3153   9.1227   1.0000  -0.1805   5.6864   1.0000   0.0000
-  5  5  96.1871  93.7006  68.6860   0.0955  -0.4781   1.0000  17.8574   0.6000
-         0.2723  -0.2373   9.7875   1.0000  -0.0950   6.4757   1.0000   0.0000
-  6  6 109.1904  70.8314  30.0000   0.2765  -0.3000   1.0000  16.0000   0.1583
-         0.2804  -0.1994   8.1117   1.0000  -0.0675   8.2993   0.0000   0.0000
-  2  6 137.1002   0.0000   0.0000  -0.1902   0.0000   1.0000   6.0000   0.4256
-        17.7186   1.0000   0.0000   1.0000  -0.0377   6.4281   0.0000   0.0000
-  3  6 191.1743  52.0733  43.3991  -0.2584  -0.3000   1.0000  36.0000   0.8764
-         1.0248  -0.3658   4.2151   1.0000  -0.5004   4.2605   1.0000   0.0000
- 10    ! Nr of off-diagonal terms; Ediss;Ro;gamma;rsigma;rpi;rpi2   
+ 6     ! Nr of off-diagonal terms; Ediss;Ro;gamma;rsigma;rpi;rpi2   
   1  2   0.0464   1.8296   9.9214   1.0029  -1.0000  -1.0000   0.0000
   2  3   0.0403   1.6913  10.4801   0.8774  -1.0000  -1.0000   0.0000
   2  4   0.0524   1.7325  10.1306   0.9982  -1.0000  -1.0000 294.9500
   1  3   0.1028   1.9277   9.1521   1.3399   1.1104   1.1609 631.8500
   1  4   0.2070   1.7366   9.5916   1.2960   1.2008   1.1262 650.0000
   3  4   0.0491   1.7025  10.6101   1.3036   1.1276   1.0173 880.0000
-  2  6   0.0470   1.6738  11.6877   1.1931  -1.0000  -1.0000   0.0000
-  3  6   0.1263   1.8163  10.6833   1.6266   1.2052  -1.0000   0.0000
-  1  5   0.1408   1.8161   9.9393   1.7986   1.3021   1.4031   0.0000
-  2  5   0.0895   1.6239  10.0104   1.4640  -1.0000  -1.0000   0.0000
- 62    ! Nr of angles;at1;at2;at3;Thetao,o;ka;kb;pv1;pv2            
+ 41    ! Nr of angles;at1;at2;at3;Thetao,o;ka;kb;pv1;pv2            
   1  1  1  74.0317  32.2712   0.9501   0.0000   0.1780  10.5736   1.0400
   1  1  2  70.6558  14.3658   5.3224   0.0000   0.0058   0.0000   1.0400
   2  1  2  76.7339  14.4217   3.3631   0.0000   0.0127   0.0000   1.0400
@@ -164,33 +134,12 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
   2  4  2  55.8679  14.2331   2.9225   0.0000   0.2000   0.0000   2.9932
   1  2  3   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   1  2  4   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
-  1  2  5   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   3  2  3   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   3  2  4   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   4  2  4   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   2  2  3   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
   2  2  4   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
-  1  1  5  74.4180  33.4273   1.7018   0.1463   0.5000   0.0000   1.6178
-  1  5  1  79.7037  28.2036   1.7073   0.1463   0.5000   0.0000   1.6453
-  2  1  5  63.3289  29.4225   2.1326   0.0000   0.5000   0.0000   3.0000
-  1  5  2  85.9449  38.3109   1.2492   0.0000   0.5000   0.0000   1.1000
-  1  5  5  85.6645  40.0000   2.9274   0.1463   0.5000   0.0000   1.3830
-  2  5  2  83.8555   5.1317   0.4377   0.0000   0.5000   0.0000   3.0000
-  2  5  5  97.0064  32.1121   2.0242   0.0000   0.5000   0.0000   2.8568
-  6  6  6  69.3456  21.7361   1.4283   0.0000  -0.2101   0.0000   1.3241
-  2  6  6  75.6168  21.5317   1.0435   0.0000   2.5179   0.0000   1.0400
-  2  6  2  78.3939  20.9772   0.8630   0.0000   2.8421   0.0000   1.0400
-  3  6  6  70.3016  15.4081   1.3267   0.0000   2.1459   0.0000   1.0400
-  2  6  3  73.8232  16.6592   3.7425   0.0000   0.8613   0.0000   1.0400
-  3  6  3  90.0344   7.7656   1.7264   0.0000   0.7689   0.0000   1.0400
-  6  3  6  22.1715   3.6615   0.3160   0.0000   4.1125   0.0000   1.0400
-  2  3  6  83.7634   5.6693   2.7780   0.0000   1.6982   0.0000   1.0400
-  3  3  6  73.4663  25.0761   0.9143   0.0000   2.2466   0.0000   1.0400
-  2  2  6   0.0000  47.1300   6.0000   0.0000   1.6371   0.0000   1.0400
-  6  2  6   0.0000  31.5209   6.0000   0.0000   1.6371   0.0000   1.0400
-  3  2  6   0.0000  31.0427   4.5625   0.0000   1.6371   0.0000   1.0400
-  2  2  5   0.0000   0.0019   6.0000   0.0000   0.0000   0.0000   1.0400
- 31    ! Nr of torsions;at1;at2;at3;at4;;V1;V2;V3;V2(BO);vconj;n.u;n
+ 25    ! Nr of torsions;at1;at2;at3;at4;;V1;V2;V3;V2(BO);vconj;n.u;n
   1  1  1  1   0.0000  48.4194   0.3163  -8.6506  -1.7255   0.0000   0.0000
   1  1  1  2   0.0000  63.3484   0.2210  -8.8401  -1.8081   0.0000   0.0000
   2  1  1  2   0.0000  45.2741   0.4171  -6.9800  -1.2359   0.0000   0.0000
@@ -205,12 +154,6 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
   0  4  4  0   2.0000  75.3685  -0.7852  -9.0000  -2.0000   0.0000   0.0000
   0  1  1  0   0.0930  18.6070  -1.3191  -9.0000  -1.0000   0.0000   0.0000
   4  1  4  4  -2.0000  20.6655  -1.5000  -9.0000  -2.0000   0.0000   0.0000
-  0  1  5  0   4.0885  78.7058   0.1174  -2.1639   0.0000   0.0000   0.0000
-  0  5  5  0  -0.0170 -56.0786   0.6132  -2.2092   0.0000   0.0000   0.0000
-  0  2  5  0   0.0000   0.0000   0.0000   0.0000   0.0000   0.0000   0.0000
-  0  6  6  0   0.0000   0.0000   0.1200  -2.4426   0.0000   0.0000   0.0000
-  0  2  6  0   0.0000   0.0000   0.1200  -2.4847   0.0000   0.0000   0.0000
-  0  3  6  0   0.0000   0.0000   0.1200  -2.4703   0.0000   0.0000   0.0000
   1  1  3  3   1.2707  21.6200   1.5000  -9.0000  -2.0000   0.0000   0.0000
   1  3  3  1  -1.8804  79.9255  -1.5000  -4.1940  -2.0000   0.0000   0.0000
   3  1  3  3  -2.0000  22.5092   1.5000  -8.9500  -2.0000   0.0000   0.0000
@@ -222,13 +165,8 @@ DATE: 2012-01-06 CONTRIBUTOR: Aidan Thompson, athomps@sandia.gov CITATION: Liu,
   1  1  4  3  -2.0000  47.8326  -1.5000  -9.0000  -2.0000   0.0000   0.0000
   2  3  4  3  -0.2997 152.9040  -1.5000  -4.4564  -2.0000   0.0000   0.0000
   2  4  4  3   0.1040  70.1152   0.5284  -3.5026  -2.0000   0.0000   0.0000
-  9    ! Nr of hydrogen bonds;at1;at2;at3;Rhb;Dehb;vhb1             
+  4    ! Nr of hydrogen bonds;at1;at2;at3;Rhb;Dehb;vhb1             
   3  2  3   2.1845  -2.3549   3.0582  19.1627
   3  2  4   1.6658  -3.8907   3.0582  19.1627
   4  2  3   1.8738  -3.5421   3.0582  19.1627
   4  2  4   1.8075  -4.1846   3.0582  19.1627
-  3  2  5   2.6644  -3.0000   3.0000   3.0000
-  4  2  5   4.0476  -3.0000   3.0000   3.0000
-  5  2  3   2.1126  -4.5790   3.0000   3.0000
-  5  2  4   2.2066  -5.7038   3.0000   3.0000
-  5  2  5   1.9461  -4.0000   3.0000   3.0000
diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index a850a9b2b350c7a8c9de4b88fb9fda912f564a4c..20429009db948c5cf89b006c4b1807cb24f376f9 100755
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -35,8 +35,12 @@ action pair_beck_gpu.cpp
 action pair_beck_gpu.h 
 action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp
 action pair_born_coul_long_gpu.h pair_born_coul_long.cpp
+action pair_born_coul_long_cs_gpu.cpp pair_born_coul_long_cs.cpp
+action pair_born_coul_long_cs_gpu.h pair_born_coul_long_cs.cpp
 action pair_born_coul_wolf_gpu.cpp
 action pair_born_coul_wolf_gpu.h
+action pair_born_coul_wolf_cs_gpu.cpp pair_born_coul_wolf_cs.cpp
+action pair_born_coul_wolf_cs_gpu.h pair_born_coul_wolf_cs.cpp
 action pair_born_gpu.cpp
 action pair_born_gpu.h
 action pair_buck_coul_cut_gpu.cpp pair_buck_coul_cut.cpp
@@ -55,6 +59,8 @@ action pair_coul_dsf_gpu.cpp
 action pair_coul_dsf_gpu.h
 action pair_coul_long_gpu.cpp pair_coul_long.cpp
 action pair_coul_long_gpu.h pair_coul_long.cpp
+action pair_coul_long_cs_gpu.cpp pair_coul_long_cs.cpp
+action pair_coul_long_cs_gpu.h pair_coul_long_cs.cpp
 action pair_dpd_gpu.cpp
 action pair_dpd_gpu.h
 action pair_dpd_tstat_gpu.cpp
@@ -97,6 +103,8 @@ action pair_lj_cut_gpu.cpp
 action pair_lj_cut_gpu.h
 action pair_lj_expand_gpu.cpp
 action pair_lj_expand_gpu.h
+action pair_lj_expand_coul_long_gpu.cpp pair_lj_expand_coul_long.cpp
+action pair_lj_expand_coul_long_gpu.h pair_lj_expand_coul_long.cpp
 action pair_lj_gromacs_gpu.cpp
 action pair_lj_gromacs_gpu.h
 action pair_lj_sdk_coul_long_gpu.cpp pair_lj_sdk_coul_long.cpp
diff --git a/src/GPU/pair_born_coul_long_cs_gpu.cpp b/src/GPU/pair_born_coul_long_cs_gpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..13cf5f643abf8bb1b7003a64910ad6dc70fd371d
--- /dev/null
+++ b/src/GPU/pair_born_coul_long_cs_gpu.cpp
@@ -0,0 +1,340 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (Northwestern)
+------------------------------------------------------------------------- */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include "pair_born_coul_long_cs_gpu.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "integrate.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_request.h"
+#include "universe.h"
+#include "update.h"
+#include "domain.h"
+#include "kspace.h"
+#include "gpu_extra.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   9.95473818e-1
+#define B0       -0.1335096380159268
+#define B1       -2.57839507e-1
+#define B2       -1.37203639e-1
+#define B3       -8.88822059e-3
+#define B4       -5.80844129e-3
+#define B5        1.14652755e-1
+
+#define EPSILON 1.0e-20
+#define EPS_EWALD 1.0e-6
+#define EPS_EWALD_SQR 1.0e-12
+
+// External functions from cuda library for atom decomposition
+
+int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
+                    double **host_born1, double **host_born2,
+                    double **host_born3, double **host_a,
+                    double **host_c, double **host_d,
+                    double **sigma, double **offset, double *special_lj,
+                    const int inum, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size,
+                    int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                    double host_cut_coulsq, double *host_special_coul,
+                    const double qqrd2e, const double g_ewald);
+void bornclcs_gpu_clear();
+int** bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                           double **host_x, int *host_type, double *sublo,
+                           double *subhi, tagint *tag, int **nspecial,
+                           tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start,
+                           int **ilist, int **jnum,  const double cpu_time,
+                           bool &success, double *host_q, double *boxlo,
+                           double *prd);
+void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q,
+                        const int nlocal, double *boxlo, double *prd);
+double bornclcs_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairBornCoulLongCSGPU::PairBornCoulLongCSGPU(LAMMPS *lmp) :
+  PairBornCoulLongCS(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairBornCoulLongCSGPU::~PairBornCoulLongCSGPU()
+{
+  bornclcs_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = bornclcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+                                      atom->type, domain->sublo, domain->subhi,
+                                      atom->tag, atom->nspecial, atom->special,
+                                      eflag, vflag, eflag_atom, vflag_atom,
+                                      host_start, &ilist, &numneigh, cpu_time,
+                                      success, atom->q, domain->boxlo,
+                                      domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    bornclcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                       vflag_atom, host_start, cpu_time, success, atom->q,
+                       atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  if (host_start<inum) {
+    cpu_time = MPI_Wtime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = MPI_Wtime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairBornCoulLongCSGPU::init_style()
+{
+  if (!atom->q_flag)
+    error->all(FLERR,
+      "Pair style born/coul/long/cs/gpu requires atom attribute q");
+  if (force->newton_pair)
+    error->all(FLERR,
+       "Cannot use newton pair with born/coul/long/cs/gpu pair style");
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cut *= cut;
+        if (cut > maxcut)
+          maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // insure use of KSpace long-range solver, set g_ewald
+
+  if (force->kspace == NULL)
+    error->all(FLERR,"Pair style requires a KSpace style");
+  g_ewald = force->kspace->g_ewald;
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int success = bornclcs_gpu_init(atom->ntypes+1, cutsq,  rhoinv,
+                                born1, born2, born3, a, c, d, sigma,
+                                offset, force->special_lj, atom->nlocal,
+                                  atom->nlocal+atom->nghost, 300, maxspecial,
+                                   cell_size, gpu_mode, screen, cut_ljsq,
+                                cut_coulsq, force->special_coul,
+                                force->qqrd2e, g_ewald);
+
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBornCoulLongCSGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + bornclcs_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
+                                      int vflag, int *ilist, int *numneigh,
+                                      int **firstneigh)
+{
+  int i,j,ii,jj,jnum,itable,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc,u;
+  int *jlist;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+        if (rsq < cut_coulsq) {
+          rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+          r2inv = 1.0/rsq;
+          if (!ncoultablebits || rsq <= tabinnersq) {
+            r = sqrt(rsq);
+            prefactor = qqrd2e * qtmp*q[j];
+            if (factor_coul < 1.0) {
+              // When bonded parts are being calculated a minimal distance (EPS_EWALD)
+              // has to be added to the prefactor and erfc in order to make the
+              // used approximation functions for the Ewald correction valid
+              grij = g_ewald * (r+EPS_EWALD);
+              expm2 = exp(-grij*grij);
+              t = 1.0 / (1.0 + EWALD_P*grij);
+              u = 1.0 - t;
+              erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+              prefactor /= (r+EPS_EWALD);
+              forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
+              // Additionally r2inv needs to be accordingly modified since the later
+              // scaling of the overall force shall be consistent
+              r2inv = 1.0/(rsq + EPS_EWALD_SQR);
+            } else {
+              grij = g_ewald * r;
+              expm2 = exp(-grij*grij);
+              t = 1.0 / (1.0 + EWALD_P*grij);
+              u = 1.0 - t;
+              erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+              prefactor /= r;
+              forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+            }
+          } else {
+            union_int_float_t rsq_lookup;
+            rsq_lookup.f = rsq;
+            itable = rsq_lookup.i & ncoulmask;
+            itable >>= ncoulshiftbits;
+            fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+            table = ftable[itable] + fraction*dftable[itable];
+            forcecoul = qtmp*q[j] * table;
+            if (factor_coul < 1.0) {
+              table = ctable[itable] + fraction*dctable[itable];
+              prefactor = qtmp*q[j] * table;
+              forcecoul -= (1.0-factor_coul)*prefactor;
+            }
+          }
+
+          forcecoul *= r2inv;
+
+        } else forcecoul = 0;
+
+        r2inv = 1.0/rsq;
+        r = sqrt(rsq);
+        if (rsq < cut_ljsq[itype][jtype]) {
+          r6inv = r2inv*r2inv*r2inv;
+          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+            + born3[itype][jtype]*r2inv*r6inv;
+        } else forceborn = 0.0;
+
+        fpair = forcecoul + factor_lj*forceborn * r2inv;
+
+        f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            ecoul = prefactor*erfc;
+            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+          } else ecoul = 0.0;
+          if (rsq < cut_ljsq[itype][jtype]) {
+            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
+              + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl *= factor_lj;
+          } else evdwl = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_born_coul_long_cs_gpu.h b/src/GPU/pair_born_coul_long_cs_gpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..65c80157f2a6c334e229d4d072cb3acfa02d5d75
--- /dev/null
+++ b/src/GPU/pair_born_coul_long_cs_gpu.h
@@ -0,0 +1,66 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/coul/long/cs/gpu,PairBornCoulLongCSGPU)
+
+#else
+
+#ifndef LMP_PAIR_BORN_COUL_LONG_CS_GPU_H
+#define LMP_PAIR_BORN_COUL_LONG_CS_GPU_H
+
+#include "pair_born_coul_long_cs.h"
+
+namespace LAMMPS_NS {
+
+class PairBornCoulLongCSGPU : public PairBornCoulLongCS {
+ public:
+  PairBornCoulLongCSGPU(LAMMPS *lmp);
+  ~PairBornCoulLongCSGPU();
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int);
+  void init_style();
+  double memory_usage();
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Insufficient memory on accelerator
+
+There is insufficient memory on one of the devices specified for the gpu
+package
+
+E: Pair style born/coul/long/cs/gpu requires atom attribute q
+
+The atom style defined does not have this attribute.
+
+E: Cannot use newton pair with born/coul/long/cs/gpu pair style
+
+Self-explanatory.
+
+E: Pair style requires a KSpace style
+
+No kspace style is defined.
+
+*/
diff --git a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..90ddf825a96c42859d1876a4e502a1c5cf60ac61
--- /dev/null
+++ b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
@@ -0,0 +1,295 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Trung Dac Nguyen (Northwestern)
+------------------------------------------------------------------------- */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include "pair_born_coul_wolf_cs_gpu.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "integrate.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_request.h"
+#include "universe.h"
+#include "update.h"
+#include "domain.h"
+#include "gpu_extra.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define EPSILON 1.0e-20
+
+// External functions from cuda library for atom decomposition
+
+int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
+                    double **host_born1, double **host_born2,
+                    double **host_born3, double **host_a, double **host_c,
+                    double **host_d, double **sigma, double **offset,
+                    double *special_lj, const int inum,
+                    const int nall, const int max_nbors, const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen,
+                    double **host_cut_ljsq, double host_cut_coulsq,
+                    double *host_special_coul, const double qqrd2e,
+                    const double alf, const double e_shift, const double f_shift);
+void borncwcs_gpu_clear();
+int ** borncwcs_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                            double **host_x, int *host_type, double *sublo,
+                            double *subhi, tagint *tag, int **nspecial,
+                            tagint **special, const bool eflag, const bool vflag,
+                            const bool eatom, const bool vatom, int &host_start,
+                            int **ilist, int **jnum, const double cpu_time,
+                            bool &success, double *host_q, double *boxlo,
+                            double *prd);
+void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, double *host_q,
+                        const int nlocal, double *boxlo, double *prd);
+double borncwcs_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairBornCoulWolfCSGPU::PairBornCoulWolfCSGPU(LAMMPS *lmp) : PairBornCoulWolfCS(lmp),
+                                                      gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairBornCoulWolfCSGPU::~PairBornCoulWolfCSGPU()
+{
+  borncwcs_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = borncwcs_gpu_compute_n(neighbor->ago, inum, nall,
+                                      atom->x, atom->type, domain->sublo,
+                                      domain->subhi, atom->tag, atom->nspecial,
+                                      atom->special, eflag, vflag, eflag_atom,
+                                      vflag_atom, host_start,
+                                      &ilist, &numneigh, cpu_time, success,
+                                      atom->q, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    borncwcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                       vflag_atom, host_start, cpu_time, success, atom->q,
+                       atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  if (host_start<inum) {
+    cpu_time = MPI_Wtime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = MPI_Wtime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairBornCoulWolfCSGPU::init_style()
+{
+  if (force->newton_pair)
+    error->all(FLERR,
+      "Cannot use newton pair with born/coul/wolf/cs/gpu pair style");
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cut *= cut;
+        if (cut > maxcut)
+          maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  double e_shift = erfc(alf*cut_coul)/cut_coul;
+  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
+    cut_coul;
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int success = borncwcs_gpu_init(atom->ntypes+1, cutsq, rhoinv,
+                                born1, born2, born3, a, c, d, sigma, offset,
+                                force->special_lj, atom->nlocal,
+                                atom->nlocal+atom->nghost, 300, maxspecial,
+                                cell_size, gpu_mode, screen, cut_ljsq,
+                                cut_coulsq, force->special_coul, force->qqrd2e,
+                                alf, e_shift, f_shift);
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBornCoulWolfCSGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + borncwcs_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag, int vflag,
+                                      int *ilist, int *numneigh,
+                                      int **firstneigh) {
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
+  double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
+  double prefactor;
+  double r,rexp;
+  int *jlist;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  double e_shift = erfc(alf*cut_coul)/cut_coul;
+  double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
+    cut_coul;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    qisq = qtmp*qtmp;
+    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
+    if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        rsq += EPSILON; // Add EPISLON for case: r = 0; Interaction must be removed by special bond
+        r2inv = 1.0/rsq;
+
+        if (rsq < cut_coulsq) {
+          r = sqrt(rsq);
+          prefactor = qqrd2e*qtmp*q[j]/r;
+          erfcc = erfc(alf*r);
+          erfcd = exp(-alf*alf*r*r);
+          v_sh = (erfcc - e_shift*r) * prefactor;
+          dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
+          forcecoul = dvdrr*rsq*prefactor;
+          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+        } else forcecoul = 0.0;
+
+        if (rsq < cut_ljsq[itype][jtype]) {
+          r6inv = r2inv*r2inv*r2inv;
+          r = sqrt(rsq);
+          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
+            born3[itype][jtype]*r2inv*r6inv;
+        } else forceborn = 0.0;
+
+        fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
+
+        f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            ecoul = v_sh;
+            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+          } else ecoul = 0.0;
+          if (rsq < cut_ljsq[itype][jtype]) {
+            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
+              d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl *= factor_lj;
+          } else evdwl = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_born_coul_wolf_cs_gpu.h b/src/GPU/pair_born_coul_wolf_cs_gpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f4f4ea35c4e867c18cb52d3aacc7f901df7544d
--- /dev/null
+++ b/src/GPU/pair_born_coul_wolf_cs_gpu.h
@@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/coul/wolf/cs/gpu,PairBornCoulWolfCSGPU)
+
+#else
+
+#ifndef LMP_PAIR_BORN_COUL_WOLF_CS_GPU_H
+#define LMP_PAIR_BORN_COUL_WOLF_CS_GPU_H
+
+#include "pair_born_coul_wolf_cs.h"
+
+namespace LAMMPS_NS {
+
+class PairBornCoulWolfCSGPU : public PairBornCoulWolfCS {
+ public:
+  PairBornCoulWolfCSGPU(LAMMPS *lmp);
+  ~PairBornCoulWolfCSGPU();
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int);
+  void init_style();
+  double memory_usage();
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Insufficient memory on accelerator
+
+There is insufficient memory on one of the devices specified for the gpu
+package
+
+E: Cannot use newton pair with born/coul/wolf/cs/gpu pair style
+
+Self-explanatory.
+
+*/
diff --git a/src/GPU/pair_coul_long_cs_gpu.cpp b/src/GPU/pair_coul_long_cs_gpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cbb9c73ceb0fa0cc371d62781647a427b338a130
--- /dev/null
+++ b/src/GPU/pair_coul_long_cs_gpu.cpp
@@ -0,0 +1,320 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (Northwestern)
+------------------------------------------------------------------------- */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include "pair_coul_long_cs_gpu.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "integrate.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_request.h"
+#include "universe.h"
+#include "update.h"
+#include "domain.h"
+#include "kspace.h"
+#include "gpu_extra.h"
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   9.95473818e-1
+#define B0       -0.1335096380159268
+#define B1       -2.57839507e-1
+#define B2       -1.37203639e-1
+#define B3       -8.88822059e-3
+#define B4       -5.80844129e-3
+#define B5        1.14652755e-1
+
+#define EPSILON 1.0e-20
+#define EPS_EWALD 1.0e-6
+#define EPS_EWALD_SQR 1.0e-12
+
+// External functions from cuda library for atom decomposition
+
+int clcs_gpu_init(const int ntypes, double **scale,
+                const int nlocal, const int nall, const int max_nbors,
+                const int maxspecial, const double cell_size, int &gpu_mode,
+                FILE *screen, double host_cut_coulsq, double *host_special_coul,
+                const double qqrd2e, const double g_ewald);
+void clcs_gpu_reinit(const int ntypes, double **scale);
+void clcs_gpu_clear();
+int ** clcs_gpu_compute_n(const int ago, const int inum,
+                        const int nall, double **host_x, int *host_type,
+                        double *sublo, double *subhi, tagint *tag,
+                        int **nspecial, tagint **special, const bool eflag,
+                        const bool vflag, const bool eatom, const bool vatom,
+                        int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q,
+                        double *boxlo, double *prd);
+void clcs_gpu_compute(const int ago, const int inum, const int nall,
+                    double **host_x, int *host_type, int *ilist, int *numj,
+                    int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start,
+                    const double cpu_time, bool &success, double *host_q,
+                    const int nlocal, double *boxlo, double *prd);
+double clcs_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulLongCSGPU::PairCoulLongCSGPU(LAMMPS *lmp) :
+  PairCoulLongCS(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairCoulLongCSGPU::~PairCoulLongCSGPU()
+{
+  clcs_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulLongCSGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+                                  atom->type, domain->sublo, domain->subhi,
+                                  atom->tag, atom->nspecial, atom->special,
+                                  eflag, vflag, eflag_atom, vflag_atom,
+                                  host_start, &ilist, &numneigh, cpu_time,
+                                  success, atom->q, domain->boxlo,
+                                  domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    clcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                   ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                   vflag_atom, host_start, cpu_time, success, atom->q,
+                   atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  if (host_start<inum) {
+    cpu_time = MPI_Wtime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = MPI_Wtime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairCoulLongCSGPU::init_style()
+{
+  cut_respa = NULL;
+
+  if (!atom->q_flag)
+    error->all(FLERR,"Pair style coul/long/cs/gpu requires atom attribute q");
+  if (force->newton_pair)
+    error->all(FLERR,"Cannot use newton pair with coul/long/cs/gpu pair style");
+
+  // Call init_one calculation make sure scale is correct
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        double cut = init_one(i,j);
+      }
+    }
+  }
+  double cell_size = cut_coul + neighbor->skin;
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // insure use of KSpace long-range solver, set g_ewald
+
+  if (force->kspace == NULL)
+    error->all(FLERR,"Pair style requires a KSpace style");
+  g_ewald = force->kspace->g_ewald;
+
+  // setup force tables
+
+  if (ncoultablebits) init_tables(cut_coul,cut_respa);
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int success = clcs_gpu_init(atom->ntypes+1, scale,
+                            atom->nlocal, atom->nlocal+atom->nghost, 300,
+                            maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
+                            force->special_coul, force->qqrd2e, g_ewald);
+
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulLongCSGPU::reinit()
+{
+  Pair::reinit();
+
+  clcs_gpu_reinit(atom->ntypes+1, scale);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCoulLongCSGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + clcs_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
+                                  int vflag, int *ilist, int *numneigh,
+                                  int **firstneigh)
+{
+  int i,j,ii,jj,jnum,itable,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double fraction,table;
+  double r,r2inv,forcecoul,factor_coul;
+  double grij,expm2,prefactor,t,erfc,u;
+  int *jlist;
+  double rsq;
+
+  ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  double *special_coul = force->special_coul;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cut_coulsq) {
+        rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
+        r2inv = 1.0/rsq;
+        if (!ncoultablebits || rsq <= tabinnersq) {
+          r = sqrt(rsq);
+          prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j];
+          if (factor_coul < 1.0) {
+            // When bonded parts are being calculated a minimal distance (EPS_EWALD)
+            // has to be added to the prefactor and erfc in order to make the
+            // used approximation functions for the Ewald correction valid
+            grij = g_ewald * (r+EPS_EWALD);
+            expm2 = exp(-grij*grij);
+            t = 1.0 / (1.0 + EWALD_P*grij);
+            u = 1.0 - t;
+            erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= (r+EPS_EWALD);
+            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
+            // Additionally r2inv needs to be accordingly modified since the later
+            // scaling of the overall force shall be consistent
+            r2inv = 1.0/(rsq + EPS_EWALD_SQR);
+          } else {
+            grij = g_ewald * r;
+            expm2 = exp(-grij*grij);
+            t = 1.0 / (1.0 + EWALD_P*grij);
+            u = 1.0 - t;
+            erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
+            prefactor /= r;
+            forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+          }
+        } else {
+          union_int_float_t rsq_lookup;
+          rsq_lookup.f = rsq;
+          itable = rsq_lookup.i & ncoulmask;
+          itable >>= ncoulshiftbits;
+          fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+          table = ftable[itable] + fraction*dftable[itable];
+          forcecoul = scale[itype][jtype] * qtmp*q[j] * table;
+          if (factor_coul < 1.0) {
+            table = ctable[itable] + fraction*dctable[itable];
+            prefactor = scale[itype][jtype] * qtmp*q[j] * table;
+            forcecoul -= (1.0-factor_coul)*prefactor;
+          }
+        }
+
+        fpair = forcecoul * r2inv;
+
+        f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            if (!ncoultablebits || rsq <= tabinnersq)
+              ecoul = prefactor*erfc;
+            else {
+              table = etable[itable] + fraction*detable[itable];
+              ecoul = scale[itype][jtype] * qtmp*q[j] * table;
+            }
+            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+          } else ecoul = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_coul_long_cs_gpu.h b/src/GPU/pair_coul_long_cs_gpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..95ed8d018227aa1246d85f3764b22cd38642059a
--- /dev/null
+++ b/src/GPU/pair_coul_long_cs_gpu.h
@@ -0,0 +1,67 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/long/cs/gpu,PairCoulLongCSGPU)
+
+#else
+
+#ifndef LMP_PAIR_COUL_LONG_CS_GPU_H
+#define LMP_PAIR_COUL_LONG_CS_GPU_H
+
+#include "pair_coul_long_cs.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulLongCSGPU : public PairCoulLongCS {
+ public:
+  PairCoulLongCSGPU(LAMMPS *lmp);
+  ~PairCoulLongCSGPU();
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int);
+  void init_style();
+  void reinit();
+  double memory_usage();
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Insufficient memory on accelerator
+
+There is insufficient memory on one of the devices specified for the gpu
+package
+
+E: Pair style coul/long/cs/gpu requires atom attribute q
+
+The atom style defined does not have these attributes.
+
+E: Cannot use newton pair with coul/long/cs/gpu pair style
+
+Self-explanatory.
+
+E: Pair style requires a KSpace style
+
+No kspace style is defined.
+
+*/
diff --git a/src/GPU/pair_lj_expand_coul_long_gpu.cpp b/src/GPU/pair_lj_expand_coul_long_gpu.cpp
index 203128471d4cfaeaea3010513d89c03f31c64856..49e8be5879e3e607c6505982dc6e90d5436f2816 100644
--- a/src/GPU/pair_lj_expand_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_expand_coul_long_gpu.cpp
@@ -223,8 +223,9 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
+  double rsq,rshift,rshiftsq,rshift2inv;
+
   int *jlist;
-  double rsq;
 
   evdwl = ecoul = 0.0;
 
@@ -290,11 +291,16 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
-          r6inv = r2inv*r2inv*r2inv;
+          r = sqrt(rsq);
+          rshift = r - shift[itype][jtype];
+          rshiftsq = rshift*rshift;
+          rshift2inv = 1.0/rshiftsq;
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          forcelj = factor_lj*forcelj/rshift/r;
         } else forcelj = 0.0;
 
-        fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+        fpair = forcecoul*r2inv + forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
diff --git a/src/LATTE/fix_latte.cpp b/src/LATTE/fix_latte.cpp
index c2bfa1a5a1f1ba6500b4da5baa4633833abb6d1b..4a3fc81f518393351cafa9b0b50f051280c9f358 100644
--- a/src/LATTE/fix_latte.cpp
+++ b/src/LATTE/fix_latte.cpp
@@ -37,8 +37,8 @@ using namespace FixConst;
 extern "C" {
   void latte(int *, int *, double *, int *, int *,
              double *, double *, double *, double *,
-             double *, double *, double *, int*,
-             double *, double *, double *, double *, bool *);
+             double *, double *, double *, int *,
+             double *, double *, double *, double *, int * , bool *);
   int latte_abiversion();
 }
 
@@ -55,7 +55,7 @@ FixLatte::FixLatte(LAMMPS *lmp, int narg, char **arg) :
   if (comm->nprocs != 1)
     error->all(FLERR,"Fix latte currently runs only in serial");
 
-  if (20180207 != latte_abiversion())
+  if (20180622 != latte_abiversion())
     error->all(FLERR,"LAMMPS is linked against incompatible LATTE library");
 
   if (narg != 4) error->all(FLERR,"Illegal fix latte command");
@@ -191,14 +191,18 @@ void FixLatte::init_list(int id, NeighList *ptr)
 
 void FixLatte::setup(int vflag)
 {
+  newsystem = 1;
   post_force(vflag);
+  newsystem = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLatte::min_setup(int vflag)
 {
+  newsystem = 1;
   post_force(vflag);
+  newsystem = 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -288,10 +292,10 @@ void FixLatte::post_force(int vflag)
   if (coulomb) forces = &flatte[0][0];
   else forces = &atom->f[0][0];
   int maxiter = -1;
-
+  
   latte(flags,&natoms,coords,type,&ntypes,mass,boxlo,boxhi,&domain->xy,
         &domain->xz,&domain->yz,forces,&maxiter,&latte_energy,
-        &atom->v[0][0],&update->dt,virial,&latteerror);
+        &atom->v[0][0],&update->dt,virial,&newsystem,&latteerror);
 
   if (latteerror) error->all(FLERR,"Internal LATTE problem");
 
diff --git a/src/LATTE/fix_latte.h b/src/LATTE/fix_latte.h
index 46c65b201696d2acdcb257d2182690e710a14115..6d30ced0a075c7a162b0865c0668f9f51485cada 100644
--- a/src/LATTE/fix_latte.h
+++ b/src/LATTE/fix_latte.h
@@ -48,7 +48,7 @@ class FixLatte : public Fix {
   int coulomb,pbcflag,pe_peratom,virial_global,virial_peratom,neighflag;
   int eflag_caller;
 
-  int nmax;
+  int nmax,newsystem;
   double *qpotential;
   double **flatte;
   double latte_energy;
diff --git a/src/MISC/pair_nm_cut.cpp b/src/MISC/pair_nm_cut.cpp
index 83d734de0f4e492f8ffdadf2c771aaaaf250b248..a778d7a5b6c839f4bce0dcf1f08af09be831e56e 100644
--- a/src/MISC/pair_nm_cut.cpp
+++ b/src/MISC/pair_nm_cut.cpp
@@ -282,10 +282,12 @@ double PairNMCut::init_one(int i, int j)
     double rrr1 = pow(r0[i][j],nn[i][j])*(1-nn[i][j]);
     double rrr2 = pow(r0[i][j],mm[i][j])*(1-mm[i][j]);
 
-    etail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
-      (rr1*pow(cut[i][j],p1)-rr2*pow(cut[i][j],p2));
-    ptail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
-      nn[i][j]*mm[i][j]*(rrr1*pow(cut[i][j],p1)-rrr2*pow(cut[i][j],p2));
+    double cut3 = cut[i][j]*cut[i][j]*cut[i][j];
+    ptail_ij = 2.*MY_PI/3.*all[0]*all[1]*e0nm[i][j]*nm[i][j]*cut3 *
+      (pow(r0[i][j]/cut[i][j],nn[i][j])/(nn[i][j]-3) - pow(r0[i][j]/cut[i][j],mm[i][j])/(mm[i][j]-3));
+    etail_ij = 2.*MY_PI*all[0]*all[1]*e0nm[i][j]*cut3 *
+      (mm[i][j]*pow(r0[i][j]/cut[i][j],nn[i][j])/(nn[i][j]-3) - nn[i][j]*pow(r0[i][j]/cut[i][j],mm[i][j])/(mm[i][j]-3));
+
   }
 
   return cut[i][j];
diff --git a/src/MISC/pair_nm_cut_coul_cut.cpp b/src/MISC/pair_nm_cut_coul_cut.cpp
index 0c9a43f4dc94fc5b4a8e9d2c705be0bad3cc2438..b807bc972bfd9c5ab9e620b2c8e88e763ecd1135 100644
--- a/src/MISC/pair_nm_cut_coul_cut.cpp
+++ b/src/MISC/pair_nm_cut_coul_cut.cpp
@@ -332,10 +332,11 @@ double PairNMCutCoulCut::init_one(int i, int j)
     double rrr1 = pow(r0[i][j],nn[i][j])*(1-nn[i][j]);
     double rrr2 = pow(r0[i][j],mm[i][j])*(1-mm[i][j]);
 
-    etail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
-      (rr1*pow(cut_lj[i][j],p1)-rr2*pow(cut_lj[i][j],p2));
-    ptail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
-      nn[i][j]*mm[i][j]*(rrr1*pow(cut_lj[i][j],p1)-rrr2*pow(cut_lj[i][j],p2));
+    double cut_lj3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
+    ptail_ij = 2.*MY_PI/3.*all[0]*all[1]*e0nm[i][j]*nm[i][j]*cut_lj3 *
+      (pow(r0[i][j]/cut_lj[i][j],nn[i][j])/(nn[i][j]-3) - pow(r0[i][j]/cut_lj[i][j],mm[i][j])/(mm[i][j]-3));
+    etail_ij = 2.*MY_PI*all[0]*all[1]*e0nm[i][j]*cut_lj3 *
+      (mm[i][j]*pow(r0[i][j]/cut_lj[i][j],nn[i][j])/(nn[i][j]-3) - nn[i][j]*pow(r0[i][j]/cut_lj[i][j],mm[i][j])/(mm[i][j]-3));
 
   }
 
diff --git a/src/MISC/pair_nm_cut_coul_long.cpp b/src/MISC/pair_nm_cut_coul_long.cpp
index 3296f8fd92b62ae567c7790ffbf0d98c62cd021a..325f3da0033c78dafd85e759112ccd0056dec4d6 100644
--- a/src/MISC/pair_nm_cut_coul_long.cpp
+++ b/src/MISC/pair_nm_cut_coul_long.cpp
@@ -379,10 +379,12 @@ double PairNMCutCoulLong::init_one(int i, int j)
     double rrr1 = pow(r0[i][j],nn[i][j])*(1-nn[i][j]);
     double rrr2 = pow(r0[i][j],mm[i][j])*(1-mm[i][j]);
 
-    etail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
-      (rr1*pow(cut_lj[i][j],p1)-rr2*pow(cut_lj[i][j],p2));
-    ptail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j]*nn[i][j]*mm[i][j] *
-      (rrr1*pow(cut_lj[i][j],p1)-rrr2*pow(cut_lj[i][j],p2));
+    double cut_lj3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
+    ptail_ij = 2.*MY_PI/3.*all[0]*all[1]*e0nm[i][j]*nm[i][j]*cut_lj3 *
+      (pow(r0[i][j]/cut_lj[i][j],nn[i][j])/(nn[i][j]-3) - pow(r0[i][j]/cut_lj[i][j],mm[i][j])/(mm[i][j]-3));
+    etail_ij = 2.*MY_PI*all[0]*all[1]*e0nm[i][j]*cut_lj3 *
+      (mm[i][j]*pow(r0[i][j]/cut_lj[i][j],nn[i][j])/(nn[i][j]-3) - nn[i][j]*pow(r0[i][j]/cut_lj[i][j],mm[i][j])/(mm[i][j]-3));
+
   }
 
   return cut;
diff --git a/src/USER-MANIFOLD/fix_manifoldforce.cpp b/src/USER-MANIFOLD/fix_manifoldforce.cpp
index ab33b5bc972627e1611bbc80bbbaa3f1e3c660b3..04172d5c92da874322344e33fcd0aa4189f184cd 100644
--- a/src/USER-MANIFOLD/fix_manifoldforce.cpp
+++ b/src/USER-MANIFOLD/fix_manifoldforce.cpp
@@ -31,12 +31,12 @@ using namespace user_manifold;
 
 
 // Helper functions for parameters/equal style variables in input script
-inline bool was_var( const char *arg )
+static bool was_var( const char *arg )
 {
   return strstr( arg, "v_" ) == arg;
 }
 
-inline bool str_eq( const char *str1, const char *str2 )
+static bool str_eq( const char *str1, const char *str2 )
 {
   return strcmp(str1,str2) == 0;
 }
@@ -50,13 +50,6 @@ FixManifoldForce::FixManifoldForce(LAMMPS *lmp, int narg, char **arg) :
   MPI_Comm_rank(world,&me);
 
 
-  // Check the min-style:
-  int good_minner = str_eq(update->minimize_style,"hftn") |
-                    str_eq(update->minimize_style,"quickmin");
-  if( !good_minner){
-    error->warning(FLERR,"Minimizing with fix manifoldforce without hftn or quickmin is fishy");
-  }
-
 
   // Command is given as
   // fix <name> <group> manifoldforce manifold_name manifold_args
@@ -121,6 +114,18 @@ int FixManifoldForce::setmask()
 
 /* ---------------------------------------------------------------------- */
 
+void FixManifoldForce::init()
+{
+  // Check the min-style:
+  const bool is_good_min_style = str_eq(update->minimize_style,"hftn")
+                                || str_eq(update->minimize_style,"quickmin");
+  if (!is_good_min_style) {
+    error->all(FLERR,"Fix manifoldforce requires min_style hftn or quickmin");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void FixManifoldForce::setup(int vflag)
 {
   if (strstr(update->integrate_style,"verlet"))
diff --git a/src/USER-MANIFOLD/fix_manifoldforce.h b/src/USER-MANIFOLD/fix_manifoldforce.h
index 89bd6bd378aeb0f4bb5e6241853812993d23a975..527bee3f86d33a976a20007b09470d318575e185 100644
--- a/src/USER-MANIFOLD/fix_manifoldforce.h
+++ b/src/USER-MANIFOLD/fix_manifoldforce.h
@@ -51,6 +51,7 @@ class FixManifoldForce : public Fix {
  public:
   FixManifoldForce(class LAMMPS *, int, char **);
   int setmask();
+  void init();
   void setup(int);
   void min_setup(int);
   void post_force(int);
diff --git a/src/USER-MEAMC/meam_funcs.cpp b/src/USER-MEAMC/meam_funcs.cpp
index 599e9019d930aedac8a7c84ba45bfb8496f7a639..29db27fc4b816f891f9ff4e3e102f5171710e278 100644
--- a/src/USER-MEAMC/meam_funcs.cpp
+++ b/src/USER-MEAMC/meam_funcs.cpp
@@ -277,9 +277,9 @@ MEAM::get_Zij2(const lattice_t latt, const double cmin, const double cmax, doubl
     break;
 
   case DIA:
-    Zij2 = 0;
+    Zij2 = 12;
     a = sqrt(8.0 / 3.0);
-    numscr = 4;
+    numscr = 1;
     if (cmin < 0.500001) {
         //          call error('can not do 2NN MEAM for dia')
     }
diff --git a/src/USER-MEAMC/meam_setup_done.cpp b/src/USER-MEAMC/meam_setup_done.cpp
index fa71dba23636ac46a214de7d9915c770463e9ff3..e18a391378aecc384e8eda503ced210ce0985673 100644
--- a/src/USER-MEAMC/meam_setup_done.cpp
+++ b/src/USER-MEAMC/meam_setup_done.cpp
@@ -213,14 +213,12 @@ MEAM::compute_pair_meam(void)
           Z2 = get_Zij2(this->lattce_meam[a][b], this->Cmin_meam[a][a][b],
                    this->Cmax_meam[a][a][b], arat, scrn);
 
-          //     The B1, B2,  and L12 cases with NN2 have a trick to them; we
-          //     need to
-          //     compute the contributions from second nearest neighbors, like
-          //     a-a
+          //     The B1, B2,  and L12 cases with NN2 have a trick to them; we need to
+          //     compute the contributions from second nearest neighbors, like a-a
           //     pairs, but need to include NN2 contributions to those pairs as
           //     well.
           if (this->lattce_meam[a][b] == B1 || this->lattce_meam[a][b] == B2 ||
-              this->lattce_meam[a][b] == L12) {
+              this->lattce_meam[a][b] == L12 || this->lattce_meam[a][b] == DIA) {
             rarat = r * arat;
 
             //               phi_aa
@@ -247,7 +245,8 @@ MEAM::compute_pair_meam(void)
               }
             }
 
-            if (this->lattce_meam[a][b] == B1 || this->lattce_meam[a][b] == B2) {
+            if (this->lattce_meam[a][b] == B1 || this->lattce_meam[a][b] == B2 ||
+                this->lattce_meam[a][b] == DIA) {
               //     Add contributions to the B1 or B2 potential
               Z1 = get_Zij(this->lattce_meam[a][b]);
               Z2 = get_Zij2(this->lattce_meam[a][b], this->Cmin_meam[a][a][b],
diff --git a/src/USER-MISC/fix_bond_react.cpp b/src/USER-MISC/fix_bond_react.cpp
index 4d4b536c36e2d53b4049d0fca54d78cf52e7b557..fb56945fd390b13de08d819cdaec9a9d4b967589 100644
--- a/src/USER-MISC/fix_bond_react.cpp
+++ b/src/USER-MISC/fix_bond_react.cpp
@@ -28,6 +28,7 @@ Contributing Author: Jacob Gissinger (jacob.gissinger@colorado.edu)
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
+#include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
@@ -78,10 +79,12 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   fix1 = NULL;
   fix2 = NULL;
 
-  if (narg < 8) error->all(FLERR,"Illegal fix bond/react command 0.0");
+  if (narg < 8) error->all(FLERR,"Illegal fix bond/react command: "
+                           "too few arguments");
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
+  newton_bond = force->newton_bond;
 
   attempted_rxn = 0;
   force_reneighbor = 1;
@@ -92,6 +95,13 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   rxnID = 0;
   status = PROCEED;
 
+  nxspecial = NULL;
+  onemol_nxspecial = NULL;
+  twomol_nxspecial = NULL;
+  xspecial = NULL;
+  onemol_xspecial = NULL;
+  twomol_xspecial = NULL;
+
   // these group names are reserved for use exclusively by bond/react
   master_group = (char *) "bond_react_MASTER_group";
 
@@ -105,24 +115,29 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
     if (strcmp(arg[i],"react") == 0) {
       nreacts++;
       i = i + 6; // skip past mandatory arguments
-      if (i > narg) error->all(FLERR,"Illegal fix bond/react command 0.1");
+      if (i > narg) error->all(FLERR,"Illegal fix bond/react command: "
+                               "'react' has too few arguments");
     }
   }
 
-  if (nreacts == 0) error->all(FLERR,"Illegal fix bond/react command: missing mandatory 'react' argument");
+  if (nreacts == 0) error->all(FLERR,"Illegal fix bond/react command: "
+                               "missing mandatory 'react' argument");
 
   size_vector = nreacts;
 
   int iarg = 3;
   stabilization_flag = 0;
-  while (strcmp(arg[iarg],"react") != 0) {
+  int num_common_keywords = 1;
+  for (int m = 0; m < num_common_keywords; m++) {
     if (strcmp(arg[iarg],"stabilization") == 0) {
       if (strcmp(arg[iarg+1],"no") == 0) {
-        if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command 0.2");
+        if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: "
+                                      "'stabilization' keyword has too few arguments");
         iarg += 2;
       }
       if (strcmp(arg[iarg+1],"yes") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix bond/react command 0.21");
+        if (iarg+4 > narg) error->all(FLERR,"Illegal fix bond/react command:"
+                                      "'stabilization' keyword has too few arguments");
         int n = strlen(arg[iarg+2]) + 1;
         exclude_group = new char[n];
         strcpy(exclude_group,arg[iarg+2]);
@@ -130,7 +145,9 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
         nve_limit_xmax = arg[iarg+3];
         iarg += 4;
       }
-    }
+    } else if (strcmp(arg[iarg],"react") == 0) {
+      break;
+    } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword");
   }
 
   // set up common variables as vectors of length 'nreacts'
@@ -172,8 +189,10 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   char **files;
   files = new char*[nreacts];
 
-  int rxn = 0;
-  while (iarg < narg && strcmp(arg[iarg],"react") == 0) {
+  for (int rxn = 0; rxn < nreacts; rxn++) {
+
+    if (strcmp(arg[iarg],"react") != 0) error->all(FLERR,"Illegal fix bond/react command: "
+                                                   "'react' or 'stabilization' has incorrect arguments");
 
     iarg++;
 
@@ -185,14 +204,17 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
     groupbits[rxn] = group->bitmask[igroup];
 
     nevery[rxn] = force->inumeric(FLERR,arg[iarg++]);
-    if (nevery[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command 0.4");
+    if (nevery[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: "
+                                     "'Nevery' must be a positive integer");
 
     double cutoff = force->numeric(FLERR,arg[iarg++]);
-    if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command 0.5");
+    if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command: "
+                                 "'Rmin' cannot be negative");
     cutsq[rxn][0] = cutoff*cutoff;
 
     cutoff = force->numeric(FLERR,arg[iarg++]);
-    if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command 0.55");
+    if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/react command:"
+                                 "'Rmax' cannot be negative");
     cutsq[rxn][1] = cutoff*cutoff;
 
     unreacted_mol[rxn] = atom->find_molecule(arg[iarg++]);
@@ -209,22 +231,26 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
 
     while (iarg < narg && strcmp(arg[iarg],"react") != 0 ) {
       if (strcmp(arg[iarg],"prob") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command 0.6");
+        if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/react command: "
+                                      "'prob' keyword has too few arguments");
         fraction[rxn] = force->numeric(FLERR,arg[iarg+1]);
         seed[rxn] = force->inumeric(FLERR,arg[iarg+2]);
         if (fraction[rxn] < 0.0 || fraction[rxn] > 1.0)
-          error->all(FLERR,"Illegal fix bond/react command");
-        if (seed[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command 0.7");
+          error->all(FLERR,"Illegal fix bond/react command: "
+                     "probability fraction must between 0 and 1, inclusive");
+        if (seed[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: "
+                                       "probability seed must be positive");
         iarg += 3;
       } else if (strcmp(arg[iarg],"stabilize_steps") == 0) {
-        if (stabilization_flag == 0) error->all(FLERR,"Stabilize_steps keyword used without stabilization keyword");
-        if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command 0.8");
+        if (stabilization_flag == 0) error->all(FLERR,"Stabilize_steps keyword "
+                                                "used without stabilization keyword");
+        if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: "
+                                      "'stabilize_steps' has too few arguments");
         limit_duration[rxn] = force->numeric(FLERR,arg[iarg+1]);
         stabilize_steps_flag[rxn] = 1;
         iarg += 2;
-      } else error->all(FLERR,"Illegal fix bond/react command 0.9");
+      } else error->all(FLERR,"Illegal fix bond/react command: unknown keyword");
     }
-    rxn++;
   }
 
   max_natoms = 0; // the number of atoms in largest molecule template
@@ -243,6 +269,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
     open(files[i]);
     onemol = atom->molecules[unreacted_mol[i]];
     twomol = atom->molecules[reacted_mol[i]];
+    get_molxspecials();
     read(i);
     fclose(fp);
     iatomtype[i] = onemol->type[ibonding[i]-1];
@@ -263,12 +290,13 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   for (int myrxn = 0; myrxn < nreacts; myrxn++) {
     closeneigh[myrxn] = -1; // indicates will search non-bonded neighbors
     onemol = atom->molecules[unreacted_mol[myrxn]];
-    for (int k = 0; k < onemol->nspecial[ibonding[myrxn]-1][2]; k++) {
-      if (onemol->special[ibonding[myrxn]-1][k] == jbonding[myrxn]) {
+    get_molxspecials();
+    for (int k = 0; k < onemol_nxspecial[ibonding[myrxn]-1][2]; k++) {
+      if (onemol_xspecial[ibonding[myrxn]-1][k] == jbonding[myrxn]) {
         closeneigh[myrxn] = 2; // index for 1-4 neighbor
-        if (k < onemol->nspecial[ibonding[myrxn]-1][1])
+        if (k < onemol_nxspecial[ibonding[myrxn]-1][1])
           closeneigh[myrxn] = 1; // index for 1-3 neighbor
-        if (k < onemol->nspecial[ibonding[myrxn]-1][0])
+        if (k < onemol_nxspecial[ibonding[myrxn]-1][0])
           closeneigh[myrxn] = 0; // index for 1-2 neighbor
         break;
       }
@@ -289,14 +317,12 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   comm_reverse = 2;
 
   // allocate arrays local to this fix
-
   nmax = 0;
   partner = finalpartner = NULL;
   distsq = NULL;
   probability = NULL;
   maxcreate = 0;
   created = NULL;
-  local_ncreate = NULL;
   ncreate = NULL;
   allncreate = 0;
   local_num_mega = 0;
@@ -334,7 +360,6 @@ FixBondReact::~FixBondReact()
 
   memory->destroy(partner);
   memory->destroy(finalpartner);
-  memory->destroy(local_ncreate);
   memory->destroy(ncreate);
   memory->destroy(distsq);
   memory->destroy(probability);
@@ -363,6 +388,15 @@ FixBondReact::~FixBondReact()
   memory->destroy(ghostly_rxn_count);
   memory->destroy(reaction_count_total);
 
+  if (newton_bond == 0) {
+    memory->destroy(xspecial);
+    memory->destroy(nxspecial);
+    memory->destroy(onemol_xspecial);
+    memory->destroy(onemol_nxspecial);
+    memory->destroy(twomol_xspecial);
+    memory->destroy(twomol_nxspecial);
+  }
+
   if (attempted_rxn == 1) {
     memory->destroy(restore_pt);
     memory->destroy(restore);
@@ -531,17 +565,6 @@ void FixBondReact::post_constructor()
 void FixBondReact::init()
 {
 
-  // warn if more than one bond/react fix
-
-  int count = 0;
-  for (int i = 0; i < modify->nfix; i++)
-    if (strcmp(modify->fix[i]->style,"bond/react") == 0) count++;
-  if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one fix bond/react");
-
-  if (force->newton_bond == 0 && comm->me == 0) error->warning(FLERR,"Fewer reactions may occur in some cases "
-                                                           "when 'newton off' is set for bonded interactions. "
-                                                           "The reccomended setting is 'newton on'.");
-
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
@@ -606,21 +629,18 @@ void FixBondReact::post_integrate()
     memory->destroy(partner);
     memory->destroy(finalpartner);
     memory->destroy(distsq);
-    memory->destroy(local_ncreate);
     memory->destroy(ncreate);
     memory->destroy(probability);
     nmax = atom->nmax;
     memory->create(partner,nmax,"bond/react:partner");
     memory->create(finalpartner,nmax,"bond/react:finalpartner");
     memory->create(distsq,nmax,2,"bond/react:distsq");
-    memory->create(local_ncreate,nreacts,"bond/react:local_ncreate");
     memory->create(ncreate,nreacts,"bond/react:ncreate");
     memory->create(probability,nmax,"bond/react:probability");
   }
 
   // reset create counts
   for (int i = 0; i < nreacts; i++) {
-    local_ncreate[i] = 0;
     ncreate[i] = 0;
   }
 
@@ -635,10 +655,32 @@ void FixBondReact::post_integrate()
 
   neighbor->build_one(list,1);
 
-  int j;
+  // here we define a full special list, independent of Newton setting
+  if (newton_bond == 1) {
+    nxspecial = atom->nspecial;
+    xspecial = atom->special;
+  } else {
+    int nall = atom->nlocal + atom->nghost;
+    memory->destroy(nxspecial);
+    memory->destroy(xspecial);
+    memory->create(nxspecial,nall,3,"bond/react:nxspecial");
+    memory->create(xspecial,nall,atom->maxspecial,"bond/react:xspecial");
+    for (int i = 0; i < atom->nlocal; i++) {
+      nxspecial[i][0] = atom->num_bond[i];
+      for (int j = 0; j < nxspecial[i][0]; j++) {
+        xspecial[i][j] = atom->bond_atom[i][j];
+      }
+      nxspecial[i][1] = atom->nspecial[i][1];
+      nxspecial[i][2] = atom->nspecial[i][2];
+      int joffset = nxspecial[i][0] - atom->nspecial[i][0];
+      for (int j = nxspecial[i][0]; j < nxspecial[i][2]; j++) {
+        xspecial[i][j+joffset] = atom->special[i][j];
+      }
+    }
+  }
 
+  int j;
   for (rxnID = 0; rxnID < nreacts; rxnID++) {
-
     for (int ii = 0; ii < nall; ii++) {
       partner[ii] = 0;
       finalpartner[ii] = 0;
@@ -647,14 +689,17 @@ void FixBondReact::post_integrate()
     }
 
     // fork between far and close_partner here
-    if (closeneigh[rxnID] < 0) far_partner();
-    else close_partner();
-
-    // reverse comm of distsq and partner
-    // not needed if newton_pair off since I,J pair was seen by both procs
-
-    commflag = 2;
-    if (force->newton_pair) comm->reverse_comm_fix(this);
+    if (closeneigh[rxnID] < 0) {
+      far_partner();
+      // reverse comm of distsq and partner
+      // not needed if newton_pair off since I,J pair was seen by both procs
+      commflag = 2;
+      if (force->newton_pair) comm->reverse_comm_fix(this);
+    } else {
+      close_partner();
+      commflag = 2;
+      comm->reverse_comm_fix(this);
+    }
 
     // each atom now knows its winning partner
     // for prob check, generate random value for each atom with a bond partner
@@ -678,6 +723,7 @@ void FixBondReact::post_integrate()
       if (partner[i] == 0) {
         continue;
       }
+
       j = atom->map(partner[i]);
       if (partner[j] != tag[i]) {
         continue;
@@ -701,8 +747,7 @@ void FixBondReact::post_integrate()
       if (tag[i] < tag[j]) temp_ncreate++;
     }
 
-    local_ncreate[rxnID] = temp_ncreate;
-    // break loop if no even eligible bonding atoms were found (on any proc)
+    // cycle loop if no even eligible bonding atoms were found (on any proc)
     int some_chance;
     MPI_Allreduce(&temp_ncreate,&some_chance,1,MPI_INT,MPI_SUM,world);
     if (!some_chance) continue;
@@ -740,7 +785,6 @@ void FixBondReact::post_integrate()
         ncreate[rxnID]++;
       }
     }
-    unlimit_bond(); //free atoms that have been relaxed
   }
 
   // break loop if no even eligible bonding atoms were found (on any proc)
@@ -778,8 +822,6 @@ void FixBondReact::far_partner()
 
   double **x = atom->x;
   tagint *tag = atom->tag;
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
   int *mask = atom->mask;
   int *type = atom->type;
 
@@ -809,10 +851,9 @@ void FixBondReact::far_partner()
     for (int jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
-
       if (!(mask[j] & groupbits[rxnID])) {
         continue;
-}
+      }
 
       if (i_limit_tags[j] != 0) {
         continue;
@@ -820,7 +861,6 @@ void FixBondReact::far_partner()
 
       jtype = type[j];
       possible = 0;
-
       if (itype == iatomtype[rxnID] && jtype == jatomtype[rxnID]) {
         possible = 1;
       } else if (itype == jatomtype[rxnID] && jtype == iatomtype[rxnID]) {
@@ -830,8 +870,8 @@ void FixBondReact::far_partner()
       if (possible == 0) continue;
 
       // do not allow bonding atoms within special list
-      for (int k = 0; k < nspecial[i][2]; k++)
-        if (special[i][k] == tag[j]) possible = 0;
+      for (int k = 0; k < nxspecial[i][2]; k++)
+        if (xspecial[i][k] == tag[j]) possible = 0;
       if (!possible) continue;
 
       delx = xtmp - x[j][0];
@@ -867,8 +907,6 @@ void FixBondReact::close_partner()
   tagint *tag = atom->tag;
   int *type = atom->type;
   int *mask = atom->mask;
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
 
   // per-atom property indicating if in bond/react master group
   int flag;
@@ -880,10 +918,10 @@ void FixBondReact::close_partner()
     itype = type[ii];
     n = 0;
     if (closeneigh[rxnID] != 0)
-      n = nspecial[ii][closeneigh[rxnID]-1];
-    for (; n < nspecial[ii][closeneigh[rxnID]]; n++) {
+      n = nxspecial[ii][closeneigh[rxnID]-1];
+    for (; n < nxspecial[ii][closeneigh[rxnID]]; n++) {
       i1 = ii;
-      i2 = atom->map(special[ii][n]);
+      i2 = atom->map(xspecial[ii][n]);
       jtype = type[i2];
       if (!(mask[i1] & groupbits[rxnID])) continue;
       if (!(mask[i2] & groupbits[rxnID])) continue;
@@ -894,6 +932,7 @@ void FixBondReact::close_partner()
       delx = x[i1][0] - x[i2][0];
       dely = x[i1][1] - x[i2][1];
       delz = x[i1][2] - x[i2][2];
+      domain->minimum_image(delx,dely,delz); // ghost location fix
       rsq = delx*delx + dely*dely + delz*delz;
       if (rsq >= cutsq[rxnID][1] || rsq <= cutsq[rxnID][0]) continue;
 
@@ -950,12 +989,13 @@ void FixBondReact::superimpose_algorithm()
     }
   }
 
-  //let's finally begin the superimpose loop
+  // let's finally begin the superimpose loop
   for (rxnID = 0; rxnID < nreacts; rxnID++) {
     for (lcl_inst = 0; lcl_inst < ncreate[rxnID]; lcl_inst++) {
 
       onemol = atom->molecules[unreacted_mol[rxnID]];
       twomol = atom->molecules[reacted_mol[rxnID]];
+      get_molxspecials();
 
       status = PROCEED;
 
@@ -985,11 +1025,11 @@ void FixBondReact::superimpose_algorithm()
       for (int i = 0; i < max_natoms; i++)
         pioneer_count[i] = 0;
 
-      for (int i = 0; i < onemol->nspecial[myibonding-1][0]; i++)
-        pioneer_count[onemol->special[myibonding-1][i]-1]++;
+      for (int i = 0; i < onemol_nxspecial[myibonding-1][0]; i++)
+        pioneer_count[onemol_xspecial[myibonding-1][i]-1]++;
 
-      for (int i = 0; i < onemol->nspecial[myjbonding-1][0]; i++)
-        pioneer_count[onemol->special[myjbonding-1][i]-1]++;
+      for (int i = 0; i < onemol_nxspecial[myjbonding-1][0]; i++)
+        pioneer_count[onemol_xspecial[myjbonding-1][i]-1]++;
 
 
       int hang_catch = 0;
@@ -1000,7 +1040,7 @@ void FixBondReact::superimpose_algorithm()
         }
 
         for (int i = 0; i < onemol->natoms; i++) {
-          if (glove[i][0] !=0 && pioneer_count[i] < onemol->nspecial[i][0] && edge[i][rxnID] == 0) {
+          if (glove[i][0] !=0 && pioneer_count[i] < onemol_nxspecial[i][0] && edge[i][rxnID] == 0) {
             pioneers[i] = 1;
           }
         }
@@ -1059,10 +1099,8 @@ void FixBondReact::superimpose_algorithm()
 
 void FixBondReact::make_a_guess()
 {
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
   int *type = atom->type;
-  int nfirst_neighs = onemol->nspecial[pion][0];
+  int nfirst_neighs = onemol_nxspecial[pion][0];
 
   // per-atom property indicating if in bond/react master group
   int flag;
@@ -1091,7 +1129,7 @@ void FixBondReact::make_a_guess()
     if (status != PROCEED) return;
   }
 
-  nfirst_neighs = onemol->nspecial[pion][0];
+  nfirst_neighs = onemol_nxspecial[pion][0];
 
   //  check if any of first neighbors are in bond_react_MASTER_group
   //  if so, this constitutes a fail
@@ -1099,18 +1137,18 @@ void FixBondReact::make_a_guess()
   //  could technically fail unnecessarily during a wrong guess if near edge atoms
   //  we accept this temporary and infrequent decrease in reaction occurences
 
-  for (int i = 0; i < nspecial[atom->map(glove[pion][1])][0]; i++) {
-    if (atom->map(special[atom->map(glove[pion][1])][i]) < 0) {
+  for (int i = 0; i < nxspecial[atom->map(glove[pion][1])][0]; i++) {
+    if (atom->map(xspecial[atom->map(glove[pion][1])][i]) < 0) {
       error->all(FLERR,"Fix bond/react needs ghost atoms from further away1"); // parallel issues.
     }
-    if (i_limit_tags[(int)atom->map(special[atom->map(glove[pion][1])][i])] != 0) {
+    if (i_limit_tags[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] != 0) {
       status = GUESSFAIL;
       return;
     }
   }
 
   // check for same number of neighbors between unreacted mol and simulation
-  if (nfirst_neighs != nspecial[atom->map(glove[pion][1])][0]) {
+  if (nfirst_neighs != nxspecial[atom->map(glove[pion][1])][0]) {
     status = GUESSFAIL;
     return;
   }
@@ -1120,7 +1158,7 @@ void FixBondReact::make_a_guess()
   int assigned_count = 0;
   for (int i = 0; i < nfirst_neighs; i++)
     for (int j = 0; j < onemol->natoms; j++)
-      if (special[atom->map(glove[pion][1])][i] == glove[j][1]) {
+      if (xspecial[atom->map(glove[pion][1])][i] == glove[j][1]) {
         assigned_count++;
         break;
       }
@@ -1137,8 +1175,8 @@ void FixBondReact::make_a_guess()
   }
 
   for (int i = 0; i < nfirst_neighs; i++) {
-    mol_ntypes[(int)onemol->type[(int)onemol->special[pion][i]-1]-1]++;
-    lcl_ntypes[(int)type[(int)atom->map(special[atom->map(glove[pion][1])][i])]-1]++; //added -1
+    mol_ntypes[(int)onemol->type[(int)onemol_xspecial[pion][i]-1]-1]++;
+    lcl_ntypes[(int)type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])]-1]++; //added -1
   }
 
   for (int i = 0; i < atom->ntypes; i++) {
@@ -1164,7 +1202,7 @@ void FixBondReact::make_a_guess()
 
 void FixBondReact::neighbor_loop()
 {
-  int nfirst_neighs = onemol->nspecial[pion][0];
+  int nfirst_neighs = onemol_nxspecial[pion][0];
 
   if (status == RESTORE) {
     check_a_neighbor();
@@ -1172,7 +1210,7 @@ void FixBondReact::neighbor_loop()
   }
 
   for (neigh = 0; neigh < nfirst_neighs; neigh++) {
-    if (glove[(int)onemol->special[pion][neigh]-1][0] == 0) {
+    if (glove[(int)onemol_xspecial[pion][neigh]-1][0] == 0) {
       check_a_neighbor();
     }
   }
@@ -1186,39 +1224,37 @@ void FixBondReact::neighbor_loop()
 
 void FixBondReact::check_a_neighbor()
 {
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
   int *type = atom->type;
-  int nfirst_neighs = onemol->nspecial[pion][0];
+  int nfirst_neighs = onemol_nxspecial[pion][0];
 
   if (status != RESTORE) {
     // special consideration for hydrogen atoms (and all first neighbors bonded to no other atoms) (and aren't edge atoms)
-    if (onemol->nspecial[(int)onemol->special[pion][neigh]-1][0] == 1 && edge[(int)onemol->special[pion][neigh]-1][rxnID] == 0) {
+    if (onemol_nxspecial[(int)onemol_xspecial[pion][neigh]-1][0] == 1 && edge[(int)onemol_xspecial[pion][neigh]-1][rxnID] == 0) {
 
       for (int i = 0; i < nfirst_neighs; i++) {
 
-        if (type[(int)atom->map(special[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol->special[pion][neigh]-1] &&
-            nspecial[(int)atom->map(special[(int)atom->map(glove[pion][1])][i])][0] == 1) {
+        if (type[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1] &&
+            nxspecial[(int)atom->map(xspecial[(int)atom->map(glove[pion][1])][i])][0] == 1) {
 
           int already_assigned = 0;
           for (int j = 0; j < onemol->natoms; j++) {
-            if (glove[j][1] == special[atom->map(glove[pion][1])][i]) {
+            if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) {
               already_assigned = 1;
               break;
             }
           }
 
           if (already_assigned == 0) {
-            glove[(int)onemol->special[pion][neigh]-1][0] = onemol->special[pion][neigh];
-            glove[(int)onemol->special[pion][neigh]-1][1] = special[(int)atom->map(glove[pion][1])][i];
+            glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh];
+            glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i];
 
             //another check for ghost atoms. perhaps remove the one in make_a_guess
-            if (atom->map(glove[(int)onemol->special[pion][neigh]-1][1]) < 0) {
+            if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) {
               error->all(FLERR,"Fix bond/react needs ghost atoms from further away2");
             }
 
-            for (int j = 0; j < onemol->nspecial[onemol->special[pion][neigh]-1][0]; j++) {
-              pioneer_count[onemol->special[onemol->special[pion][neigh]-1][j]-1]++;
+            for (int j = 0; j < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; j++) {
+              pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][j]-1]++;
             }
 
             glove_counter++;
@@ -1249,28 +1285,28 @@ void FixBondReact::check_a_neighbor()
 
   for (int i = 0; i < nfirst_neighs; i++) {
 
-    if (type[atom->map((int)special[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol->special[pion][neigh]-1]) {
+    if (type[atom->map((int)xspecial[(int)atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) {
       int already_assigned = 0;
 
       //check if a first neighbor of the pioneer is already assigned to pre-reacted template
       for (int j = 0; j < onemol->natoms; j++) {
-        if (glove[j][1] == special[atom->map(glove[pion][1])][i]) {
+        if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) {
           already_assigned = 1;
           break;
         }
       }
 
       if (already_assigned == 0) {
-        glove[(int)onemol->special[pion][neigh]-1][0] = onemol->special[pion][neigh];
-        glove[(int)onemol->special[pion][neigh]-1][1] = special[(int)atom->map(glove[pion][1])][i];
+        glove[(int)onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh];
+        glove[(int)onemol_xspecial[pion][neigh]-1][1] = xspecial[(int)atom->map(glove[pion][1])][i];
 
         //another check for ghost atoms. perhaps remove the one in make_a_guess
-        if (atom->map(glove[(int)onemol->special[pion][neigh]-1][1]) < 0) {
+        if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) {
           error->all(FLERR,"Fix bond/react needs ghost atoms from further away3");
         }
 
-        for (int ii = 0; ii < onemol->nspecial[onemol->special[pion][neigh]-1][0]; ii++) {
-          pioneer_count[onemol->special[onemol->special[pion][neigh]-1][ii]-1]++;
+        for (int ii = 0; ii < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; ii++) {
+          pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][ii]-1]++;
         }
 
         glove_counter++;
@@ -1296,7 +1332,7 @@ void FixBondReact::check_a_neighbor()
 
 void FixBondReact::crosscheck_the_neighbor()
 {
-  int nfirst_neighs = onemol->nspecial[pion][0];
+  int nfirst_neighs = onemol_nxspecial[pion][0];
 
   if (status == RESTORE) {
     inner_crosscheck_loop();
@@ -1304,8 +1340,8 @@ void FixBondReact::crosscheck_the_neighbor()
   }
 
   for (trace = 0; trace < nfirst_neighs; trace++) {
-    if (neigh!=trace && onemol->type[(int)onemol->special[pion][neigh]-1] == onemol->type[(int)onemol->special[pion][trace]-1] &&
-        glove[onemol->special[pion][trace]-1][0] == 0) {
+    if (neigh!=trace && onemol->type[(int)onemol_xspecial[pion][neigh]-1] == onemol->type[(int)onemol_xspecial[pion][trace]-1] &&
+        glove[onemol_xspecial[pion][trace]-1][0] == 0) {
 
       if (avail_guesses == MAXGUESS) {
         error->warning(FLERR,"Fix bond/react failed because MAXGUESS set too small. ask developer for info");
@@ -1338,30 +1374,29 @@ void FixBondReact::crosscheck_the_neighbor()
 
 void FixBondReact::inner_crosscheck_loop()
 {
-  tagint **special = atom->special;
   int *type = atom->type;
   // arbitrarily limited to 5 identical first neighbors
   tagint tag_choices[5];
-  int nfirst_neighs = onemol->nspecial[pion][0];
+  int nfirst_neighs = onemol_nxspecial[pion][0];
 
   int num_choices = 0;
   for (int i = 0; i < nfirst_neighs; i++) {
 
     int already_assigned = 0;
     for (int j = 0; j < onemol->natoms; j++) {
-      if (glove[j][1] == special[atom->map(glove[pion][1])][i]) {
+      if (glove[j][1] == xspecial[atom->map(glove[pion][1])][i]) {
         already_assigned = 1;
         break;
       }
     }
 
     if (already_assigned == 0 &&
-        type[(int)atom->map(special[atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol->special[pion][neigh]-1]) {
+        type[(int)atom->map(xspecial[atom->map(glove[pion][1])][i])] == onemol->type[(int)onemol_xspecial[pion][neigh]-1]) {
       if (num_choices > 5) { // here failed because too many identical first neighbors. but really no limit if situation arises
         status = GUESSFAIL;
         return;
       }
-      tag_choices[num_choices++] = special[atom->map(glove[pion][1])][i];
+      tag_choices[num_choices++] = xspecial[atom->map(glove[pion][1])][i];
     }
   }
 
@@ -1372,19 +1407,19 @@ void FixBondReact::inner_crosscheck_loop()
 
   //std::size_t size = sizeof(tag_choices) / sizeof(tag_choices[0]);
   std::sort(tag_choices, tag_choices + num_choices); //, std::greater<int>());
-  glove[onemol->special[pion][neigh]-1][0] = onemol->special[pion][neigh];
-  glove[onemol->special[pion][neigh]-1][1] = tag_choices[guess_branch[avail_guesses-1]-1];
+  glove[onemol_xspecial[pion][neigh]-1][0] = onemol_xspecial[pion][neigh];
+  glove[onemol_xspecial[pion][neigh]-1][1] = tag_choices[guess_branch[avail_guesses-1]-1];
   guess_branch[avail_guesses-1]--;
 
   //another check for ghost atoms. perhaps remove the one in make_a_guess
-  if (atom->map(glove[(int)onemol->special[pion][neigh]-1][1]) < 0) {
+  if (atom->map(glove[(int)onemol_xspecial[pion][neigh]-1][1]) < 0) {
     error->all(FLERR,"Fix bond/react needs ghost atoms from further away4");
   }
 
   if (guess_branch[avail_guesses-1] == 0) avail_guesses--;
 
-  for (int i = 0; i < onemol->nspecial[onemol->special[pion][neigh]-1][0]; i++) {
-    pioneer_count[onemol->special[onemol->special[pion][neigh]-1][i]-1]++;
+  for (int i = 0; i < onemol_nxspecial[onemol_xspecial[pion][neigh]-1][0]; i++) {
+    pioneer_count[onemol_xspecial[onemol_xspecial[pion][neigh]-1][i]-1]++;
   }
   glove_counter++;
   if (glove_counter == onemol->natoms) {
@@ -1404,15 +1439,13 @@ void FixBondReact::ring_check()
 {
   // ring_check can be made more efficient by re-introducing 'frozen' atoms
   // 'frozen' atoms have been assigned and also are no longer pioneers
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
 
   for (int i = 0; i < onemol->natoms; i++) {
-    for (int j = 0; j < onemol->nspecial[i][0]; j++) {
+    for (int j = 0; j < onemol_nxspecial[i][0]; j++) {
       int ring_fail = 1;
-      int ispecial = onemol->special[i][j];
-      for (int k = 0; k < nspecial[atom->map(glove[i][1])][0]; k++) {
-        if (special[atom->map(glove[i][1])][k] == glove[ispecial-1][1]) {
+      int ispecial = onemol_xspecial[i][j];
+      for (int k = 0; k < nxspecial[atom->map(glove[i][1])][0]; k++) {
+        if (xspecial[atom->map(glove[i][1])][k] == glove[ispecial-1][1]) {
           ring_fail = 0;
           break;
         }
@@ -1425,6 +1458,53 @@ void FixBondReact::ring_check()
   }
 }
 
+/* ----------------------------------------------------------------------
+  Get xspecials for current molecule templates
+------------------------------------------------------------------------- */
+
+void FixBondReact::get_molxspecials()
+{
+  if (newton_bond == 1) {
+    onemol_nxspecial = onemol->nspecial;
+    onemol_xspecial = onemol->special;
+    twomol_nxspecial = twomol->nspecial;
+    twomol_xspecial = twomol->special;
+  } else {
+    memory->destroy(onemol_nxspecial);
+    memory->destroy(onemol_xspecial);
+    memory->create(onemol_nxspecial,onemol->natoms,3,"bond/react:onemol_nxspecial");
+    memory->create(onemol_xspecial,onemol->natoms,atom->maxspecial,"bond/react:onemol_xspecial");
+    for (int i = 0; i < onemol->natoms; i++) {
+      onemol_nxspecial[i][0] = onemol->num_bond[i];
+      for (int j = 0; j < onemol_nxspecial[i][0]; j++) {
+        onemol_xspecial[i][j] = onemol->bond_atom[i][j];
+      }
+      onemol_nxspecial[i][1] = onemol->nspecial[i][1];
+      onemol_nxspecial[i][2] = onemol->nspecial[i][2];
+      int joffset = onemol_nxspecial[i][0] - onemol->nspecial[i][0];
+      for (int j = onemol_nxspecial[i][0]; j < onemol_nxspecial[i][2]; j++) {
+        onemol_xspecial[i][j+joffset] = onemol->special[i][j];
+      }
+    }
+    memory->destroy(twomol_nxspecial);
+    memory->destroy(twomol_xspecial);
+    memory->create(twomol_nxspecial,twomol->natoms,3,"bond/react:twomol_nxspecial");
+    memory->create(twomol_xspecial,twomol->natoms,atom->maxspecial,"bond/react:twomol_xspecial");
+    for (int i = 0; i < twomol->natoms; i++) {
+      twomol_nxspecial[i][0] = twomol->num_bond[i];
+      for (int j = 0; j < twomol_nxspecial[i][0]; j++) {
+        twomol_xspecial[i][j] = twomol->bond_atom[i][j];
+      }
+      twomol_nxspecial[i][1] = twomol->nspecial[i][1];
+      twomol_nxspecial[i][2] = twomol->nspecial[i][2];
+      int joffset = twomol_nxspecial[i][0] - twomol->nspecial[i][0];
+      for (int j = twomol_nxspecial[i][0]; j < twomol_nxspecial[i][2]; j++) {
+        twomol_xspecial[i][j+joffset] = twomol->special[i][j];
+      }
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
   Determine which pre-reacted template atoms are at least three bonds
   away from edge atoms.
@@ -1454,9 +1534,9 @@ void FixBondReact::find_landlocked_atoms(int myrxn)
 
   if (nspecial_limit != -1) {
     for (int i = 0; i < twomol->natoms; i++) {
-      for (int j = 0; j < twomol->nspecial[i][nspecial_limit]; j++) {
+      for (int j = 0; j < twomol_nxspecial[i][nspecial_limit]; j++) {
         for (int k = 0; k < onemol->natoms; k++) {
-          if (equivalences[twomol->special[i][j]-1][1][myrxn] == k+1 && edge[k][myrxn] == 1) {
+          if (equivalences[twomol_xspecial[i][j]-1][1][myrxn] == k+1 && edge[k][myrxn] == 1) {
             landlocked_atoms[i][myrxn] = 0;
           }
         }
@@ -1474,7 +1554,7 @@ void FixBondReact::find_landlocked_atoms(int myrxn)
   // also, if atoms change number of bonds, but aren't landlocked, that could be bad
   if (me == 0)
     for (int i = 0; i < twomol->natoms; i++) {
-      if (twomol->nspecial[i][0] != onemol->nspecial[equivalences[i][1][myrxn]-1][0] && landlocked_atoms[i][myrxn] == 0) {
+      if (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0] && landlocked_atoms[i][myrxn] == 0) {
         char str[128];
         sprintf(str,"An atom in 'react #%d' changes bond connectivity but not atom type",myrxn+1);
         error->warning(FLERR,str);
@@ -1721,7 +1801,9 @@ void FixBondReact::unlimit_bond()
   int *i_react_tags = atom->ivector[index3];
 
   for (int i = 0; i < atom->nlocal; i++) {
-    if (i_limit_tags[i] != 0 && (update->ntimestep + 1 - i_limit_tags[i]) > limit_duration[i_react_tags[i]]) {
+    // unlimit atoms for next step! this resolves # of procs disparity, mostly
+    // first '1': indexing offset, second '1': for next step
+    if (i_limit_tags[i] != 0 && (update->ntimestep + 1 - i_limit_tags[i]) > limit_duration[i_react_tags[i]]) { // + 1
       i_limit_tags[i] = 0;
       i_statted_tags[i] = 1;
       i_react_tags[i] = 0;
@@ -2535,17 +2617,14 @@ int FixBondReact::pack_forward_comm(int n, int *list, double *buf,
     return m;
   }
 
-  int **nspecial = atom->nspecial;
-  tagint **special = atom->special;
-
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = ubuf(finalpartner[j]).d;
-    ns = nspecial[j][0];
+    ns = nxspecial[j][0];
     buf[m++] = ubuf(ns).d;
     for (k = 0; k < ns; k++)
-      buf[m++] = ubuf(special[j][k]).d;
+      buf[m++] = ubuf(xspecial[j][k]).d;
   }
   return m;
 }
@@ -2563,25 +2642,20 @@ void FixBondReact::unpack_forward_comm(int n, int first, double *buf)
     for (i = first; i < last; i++)
       printf("hello you shouldn't be here\n");
     // bondcount[i] = (int) ubuf(buf[m++]).i;
-
   } else if (commflag == 2) {
     for (i = first; i < last; i++) {
       partner[i] = (tagint) ubuf(buf[m++]).i;
       probability[i] = buf[m++];
     }
-
   } else {
-    int **nspecial = atom->nspecial;
-    tagint **special = atom->special;
-
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       finalpartner[i] = (tagint) ubuf(buf[m++]).i;
       ns = (int) ubuf(buf[m++]).i;
-      nspecial[i][0] = ns;
+      nxspecial[i][0] = ns;
       for (j = 0; j < ns; j++)
-        special[i][j] = (tagint) ubuf(buf[m++]).i;
+        xspecial[i][j] = (tagint) ubuf(buf[m++]).i;
     }
   }
 }
diff --git a/src/USER-MISC/fix_bond_react.h b/src/USER-MISC/fix_bond_react.h
index e60fff37fbaa93f6e4e09136a57282cd883d9fe3..8ff71141f8abf66e2ea221db207983ecc9b7eae5 100644
--- a/src/USER-MISC/fix_bond_react.h
+++ b/src/USER-MISC/fix_bond_react.h
@@ -48,6 +48,7 @@ class FixBondReact : public Fix {
 
  private:
   int me,nprocs;
+  int newton_bond;
   int nreacts;
   int *nevery;
   FILE *fp;
@@ -71,7 +72,6 @@ class FixBondReact : public Fix {
   int maxcreate;
   int allncreate;
   tagint ***created;
-  int *local_ncreate;
 
   class Molecule *onemol; // pre-reacted molecule template
   class Molecule *twomol; // post-reacted molecule template
@@ -112,6 +112,9 @@ class FixBondReact : public Fix {
   int ***reverse_equiv; // re-ordered equivalences
   int **landlocked_atoms; // all atoms at least three bonds away from edge atoms
 
+  int **nxspecial,**onemol_nxspecial,**twomol_nxspecial; // full number of 1-4 neighbors
+  tagint **xspecial,**onemol_xspecial,**twomol_xspecial; // full 1-4 neighbor list
+
   int pion,neigh,trace; // important indices for various loops. required for restore points
   int lcl_inst; // reaction instance
   tagint **glove; // 1st colmn: pre-reacted template, 2nd colmn: global IDs
@@ -144,6 +147,7 @@ class FixBondReact : public Fix {
 
   void far_partner();
   void close_partner();
+  void get_molxspecials();
   void find_landlocked_atoms(int);
   void glove_ghostcheck();
   void ghost_glovecast();
@@ -152,7 +156,7 @@ class FixBondReact : public Fix {
   void limit_bond(int);
   void dedup_mega_gloves(int); //dedup global mega_glove
 
-  // DEBUG (currently obsolete)
+  // DEBUG
 
   void print_bb();
 
diff --git a/src/USER-MISC/fix_smd.cpp b/src/USER-MISC/fix_smd.cpp
index fb22483a073d327e188c62ac9bc782c74000ae26..a42690c280f960d55ef482f4f7d9c85a04af0d31 100644
--- a/src/USER-MISC/fix_smd.cpp
+++ b/src/USER-MISC/fix_smd.cpp
@@ -270,12 +270,12 @@ void FixSMD::smd_tether()
         ftotal[2] -= fz*massfrac;
         if (evflag) {
           domain->unmap(x[i],image[i],unwrap);
-          v[0] = fx*massfrac*unwrap[0];
-          v[1] = fy*massfrac*unwrap[1];
-          v[2] = fz*massfrac*unwrap[2];
-          v[3] = fx*massfrac*unwrap[1];
-          v[4] = fx*massfrac*unwrap[2];
-          v[5] = fy*massfrac*unwrap[2];
+          v[0] = -fx*massfrac*unwrap[0];
+          v[1] = -fy*massfrac*unwrap[1];
+          v[2] = -fz*massfrac*unwrap[2];
+          v[3] = -fx*massfrac*unwrap[1];
+          v[4] = -fx*massfrac*unwrap[2];
+          v[5] = -fy*massfrac*unwrap[2];
           v_tally(i, v);
         }
       }
@@ -291,12 +291,12 @@ void FixSMD::smd_tether()
         ftotal[2] -= fz*massfrac;
         if (evflag) {
           domain->unmap(x[i],image[i],unwrap);
-          v[0] = fx*massfrac*unwrap[0];
-          v[1] = fy*massfrac*unwrap[1];
-          v[2] = fz*massfrac*unwrap[2];
-          v[3] = fx*massfrac*unwrap[1];
-          v[4] = fx*massfrac*unwrap[2];
-          v[5] = fy*massfrac*unwrap[2];
+          v[0] = -fx*massfrac*unwrap[0];
+          v[1] = -fy*massfrac*unwrap[1];
+          v[2] = -fz*massfrac*unwrap[2];
+          v[3] = -fx*massfrac*unwrap[1];
+          v[4] = -fx*massfrac*unwrap[2];
+          v[5] = -fy*massfrac*unwrap[2];
           v_tally(i, v);
         }
       }
diff --git a/src/USER-MISC/pair_lj_expand_coul_long.cpp b/src/USER-MISC/pair_lj_expand_coul_long.cpp
index 3335672ac3dd305e22e16b4631dc07039ff5ee47..79a3c2a49791346e5e22e5f3f5fbc9ca22294316 100644
--- a/src/USER-MISC/pair_lj_expand_coul_long.cpp
+++ b/src/USER-MISC/pair_lj_expand_coul_long.cpp
@@ -88,7 +88,7 @@ void PairLJExpandCoulLong::compute(int eflag, int vflag)
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
-  double rsq,rshift,rshiftsq;
+  double rsq,rshift,rshiftsq,rshift2inv;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
@@ -166,8 +166,8 @@ void PairLJExpandCoulLong::compute(int eflag, int vflag)
           r = sqrt(rsq);
           rshift = r - shift[itype][jtype];
           rshiftsq = rshift*rshift;
-          r2inv = 1.0/rshiftsq;
-          r6inv = r2inv*r2inv*r2inv;
+          rshift2inv = 1.0/rshiftsq;
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj = factor_lj*forcelj/rshift/r;
         } else forcelj = 0.0;
@@ -217,7 +217,7 @@ void PairLJExpandCoulLong::compute_inner()
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double rsw,r,rshift,rshiftsq;
+  double rsw,r,rshift,rshiftsq,rshift2inv;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
@@ -275,8 +275,8 @@ void PairLJExpandCoulLong::compute_inner()
           r = sqrt(rsq);
           rshift = r - shift[itype][jtype];
           rshiftsq = rshift*rshift;
-          r2inv = 1.0/rshiftsq;
-          r6inv = r2inv*r2inv*r2inv;
+          rshift2inv = 1.0/rshiftsq;
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj = factor_lj*forcelj/rshift/r;
         } else forcelj = 0.0;
@@ -306,8 +306,8 @@ void PairLJExpandCoulLong::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
-  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
-  double rsw,r,rshift,rshiftsq;
+  double rsq,r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double rsw,rshift,rshiftsq,rshift2inv;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
@@ -370,8 +370,8 @@ void PairLJExpandCoulLong::compute_middle()
           r = sqrt(rsq);
           rshift = r - shift[itype][jtype];
           rshiftsq = rshift*rshift;
-          r2inv = 1.0/rshiftsq;
-          r6inv = r2inv*r2inv*r2inv;
+          rshift2inv = 1.0/rshiftsq;
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj = factor_lj*forcelj/rshift/r;
         } else forcelj = 0.0;
@@ -408,9 +408,8 @@ void PairLJExpandCoulLong::compute_outer(int eflag, int vflag)
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
-  double rsw,rshift,rshiftsq;
+  double rsw,rsq,rshift,rshiftsq,rshift2inv;
   int *ilist,*jlist,*numneigh,**firstneigh;
-  double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
@@ -507,8 +506,8 @@ void PairLJExpandCoulLong::compute_outer(int eflag, int vflag)
           r = sqrt(rsq);
           rshift = r - shift[itype][jtype];
           rshiftsq = rshift*rshift;
-          r2inv = 1.0/rshiftsq;
-          r6inv = r2inv*r2inv*r2inv;
+          rshift2inv = 1.0/rshiftsq;
+          r6inv = rshift2inv*rshift2inv*rshift2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj = factor_lj*forcelj/rshift/r;
           if (rsq < cut_in_on_sq) {
@@ -680,7 +679,7 @@ void PairLJExpandCoulLong::init_style()
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
   int respa = 0;
@@ -729,12 +728,13 @@ double PairLJExpandCoulLong::init_one(int i, int j)
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
+    shift[i][j] = 0.5 * (shift[i][i] + shift[j][j]);
   }
 
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
-  double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
-  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
+  double cut = MAX(cut_lj[i][j]+shift[i][j],cut_coul+2.0*qdist);
+  cut_ljsq[i][j] = (cut_lj[i][j]+shift[i][j]) *(cut_lj[i][j]+shift[i][j]);
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
diff --git a/src/USER-QUIP/pair_quip.cpp b/src/USER-QUIP/pair_quip.cpp
index 3bf12d19d2ab3217e44bea084f53523706328425..0c00a5ef59309b824e1c534d58a6fcff9df79ceb 100644
--- a/src/USER-QUIP/pair_quip.cpp
+++ b/src/USER-QUIP/pair_quip.cpp
@@ -40,6 +40,7 @@ using namespace LAMMPS_NS;
 PairQUIP::PairQUIP(LAMMPS *lmp) : Pair(lmp)
 {
    single_enable = 0;
+   restartinfo = 0;
    one_coeff = 1;
    no_virial_fdotr_compute = 1;
    manybody_flag = 1;
diff --git a/src/USER-REAXC/reaxc_ffield.cpp b/src/USER-REAXC/reaxc_ffield.cpp
index fcface79fd29718fb557e79ef1e1d37f18d89e3a..7932e9cbd8975596eeb9d3dbd161c153000ebb5d 100644
--- a/src/USER-REAXC/reaxc_ffield.cpp
+++ b/src/USER-REAXC/reaxc_ffield.cpp
@@ -157,6 +157,18 @@ char Read_Force_Field( FILE *fp, reax_interaction *reax,
     fgets( s, MAX_LINE, fp );
     c = Tokenize( s, &tmp );
 
+    /* Sanity checks */
+    if (c == 2 && !lgflag) {
+      if (me == 0)
+        fprintf(stderr, "Force field file requires using 'lgvdw yes'\n");
+      MPI_Abort( comm, FILE_NOT_FOUND );
+    }
+    if (c < 9) {
+      if (me == 0)
+        fprintf(stderr, "Inconsistent ffield file (reaxc_ffield.cpp) \n");
+      MPI_Abort( comm, FILE_NOT_FOUND );
+    }
+
     for( j = 0; j < (int)(strlen(tmp[0])); ++j )
       reax->sbp[i].name[j] = toupper( tmp[0][j] );
 
@@ -174,6 +186,13 @@ char Read_Force_Field( FILE *fp, reax_interaction *reax,
     fgets( s, MAX_LINE, fp );
     c = Tokenize( s, &tmp );
 
+    /* Sanity check */
+    if (c < 8) {
+      if (me == 0)
+        fprintf(stderr, "Inconsistent ffield file (reaxc_ffield.cpp) \n");
+      MPI_Abort( comm, FILE_NOT_FOUND );
+    }
+
     val = atof(tmp[0]); reax->sbp[i].alpha      = val;
     val = atof(tmp[1]); reax->sbp[i].gamma_w    = val;
     val = atof(tmp[2]); reax->sbp[i].valency_boc= val;
@@ -187,6 +206,13 @@ char Read_Force_Field( FILE *fp, reax_interaction *reax,
     fgets( s, MAX_LINE, fp );
     c = Tokenize( s, &tmp );
 
+    /* Sanity check */
+    if (c < 8) {
+      if (me == 0)
+        fprintf(stderr, "Inconsistent ffield file (reaxc_ffield.cpp) \n");
+      MPI_Abort( comm, FILE_NOT_FOUND );
+    }
+
     val = atof(tmp[0]); reax->sbp[i].r_pi_pi    = val;
     val = atof(tmp[1]); reax->sbp[i].p_lp2      = val;
     val = atof(tmp[2]);
@@ -201,7 +227,7 @@ char Read_Force_Field( FILE *fp, reax_interaction *reax,
     c = Tokenize( s, &tmp );
 
     /* Sanity check */
-    if (c < 3) {
+    if (c < 8) {
       if (me == 0)
         fprintf(stderr, "Inconsistent ffield file (reaxc_ffield.cpp) \n");
       MPI_Abort( comm, FILE_NOT_FOUND );
@@ -222,9 +248,9 @@ char Read_Force_Field( FILE *fp, reax_interaction *reax,
       c = Tokenize( s, &tmp );
 
       /* Sanity check */
-      if (c > 3) {
+      if (c > 2) {
         if (me == 0)
-          fprintf(stderr, "Inconsistent ffield file (reaxc_ffield.cpp) \n");
+          fprintf(stderr, "Force field file incompatible with 'lgvdw yes'\n");
         MPI_Abort( comm, FILE_NOT_FOUND );
       }
 
diff --git a/src/error.cpp b/src/error.cpp
index 2ab816c992ac910fd0a2dd789a1c08d3f9da2576..d516050385c212ae0c8eb7da4466778baf96a447 100644
--- a/src/error.cpp
+++ b/src/error.cpp
@@ -28,7 +28,7 @@ static const char *truncpath(const char *path)
    if (path) {
      int len = strlen(path);
      for (int i = len-4; i > 0; --i) {
-	if (strncmp("src/",path+i,4) == 0)
+        if (strncmp("src/",path+i,4) == 0)
           return path+i;
      }
    }
diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp
index 754c9ea4da41230b46836b28ca69a6e990c77b05..42cd1bd1992de2c9d474b018634bd33052e8b03f 100644
--- a/src/fix_adapt.cpp
+++ b/src/fix_adapt.cpp
@@ -118,7 +118,7 @@ nadapt(0), id_fix_diam(NULL), id_fix_chg(NULL), adapt(NULL)
       adapt[nadapt].bparam = new char[n];
       adapt[nadapt].bond = NULL;
       strcpy(adapt[nadapt].bparam,arg[iarg+2]);
-      force->bounds(FLERR,arg[iarg+3],atom->ntypes,
+      force->bounds(FLERR,arg[iarg+3],atom->nbondtypes,
                     adapt[nadapt].ilo,adapt[nadapt].ihi);
       if (strstr(arg[iarg+4],"v_") == arg[iarg+4]) {
         n = strlen(&arg[iarg+4][2]) + 1;
diff --git a/src/fix_dt_reset.cpp b/src/fix_dt_reset.cpp
index 006bf27e77729c1f45078642f80bec2014631f65..f2a50fd6d1c416ee9b8c1f551a1ef56305a3b945 100644
--- a/src/fix_dt_reset.cpp
+++ b/src/fix_dt_reset.cpp
@@ -69,6 +69,7 @@ FixDtReset::FixDtReset(LAMMPS *lmp, int narg, char **arg) :
 
   int scaleflag = 1;
 
+  emax = -1.0;
   int iarg = 7;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"units") == 0) {
@@ -77,6 +78,11 @@ FixDtReset::FixDtReset(LAMMPS *lmp, int narg, char **arg) :
       else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
       else error->all(FLERR,"Illegal fix dt/reset command");
       iarg += 2;
+    } else if (strcmp(arg[iarg],"emax") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix dt/reset command");
+      emax = force->numeric(FLERR,arg[iarg+1]);
+      if (emax <= 0.0) error->all(FLERR,"Illegal fix dt/reset command");
+      iarg += 2;
     } else error->all(FLERR,"Illegal fix dt/reset command");
   }
 
@@ -117,6 +123,7 @@ void FixDtReset::init()
                      "Dump dcd/xtc timestamp may be wrong with fix dt/reset");
 
   ftm2v = force->ftm2v;
+  mvv2e = force->mvv2e;
   dt = update->dt;
 }
 
@@ -131,12 +138,10 @@ void FixDtReset::setup(int vflag)
 
 void FixDtReset::end_of_step()
 {
-  double dtv,dtf,dtsq;
+  double dtv,dtf,dte,dtsq;
   double vsq,fsq,massinv;
   double delx,dely,delz,delr;
 
-  // compute vmax and amax of any atom in group
-
   double **v = atom->v;
   double **f = atom->f;
   double *mass = atom->mass;
@@ -153,10 +158,14 @@ void FixDtReset::end_of_step()
       else massinv = 1.0/mass[type[i]];
       vsq = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
       fsq = f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
-      dtv = dtf = BIG;
+      dtv = dtf = dte = BIG;
       if (vsq > 0.0) dtv = xmax/sqrt(vsq);
       if (fsq > 0.0) dtf = sqrt(2.0*xmax/(ftm2v*sqrt(fsq)*massinv));
       dt = MIN(dtv,dtf);
+      if (emax > 0.0 && vsq > 0.0 && fsq > 0.0) {
+        dte = emax/sqrt(fsq*vsq)/sqrt(ftm2v*mvv2e);
+        dt = MIN(dt, dte);
+      }
       dtsq = dt*dt;
       delx = dt*v[i][0] + 0.5*dtsq*massinv*f[i][0] * ftm2v;
       dely = dt*v[i][1] + 0.5*dtsq*massinv*f[i][1] * ftm2v;
diff --git a/src/fix_dt_reset.h b/src/fix_dt_reset.h
index 6e08b14f634d638e42fb8212445be342bf002a28..6df69138952b79eef80a2b762c06435a24627564 100644
--- a/src/fix_dt_reset.h
+++ b/src/fix_dt_reset.h
@@ -37,8 +37,8 @@ class FixDtReset : public Fix {
  private:
   bigint laststep;
   int minbound,maxbound;
-  double tmin,tmax,xmax;
-  double ftm2v;
+  double tmin,tmax,xmax,emax;
+  double ftm2v,mvv2e;
   double dt,t_laststep;
   int respaflag;
 };
diff --git a/src/input.cpp b/src/input.cpp
index b83a3683e4a3eae3984e842f55f33e672471fd85..f41c8458eae18e2b3200decaa69b0ff5c8b4fe22 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -497,7 +497,7 @@ void Input::substitute(char *&str, char *&str2, int &max, int &max2, int flag)
         beyond = ptr + strlen(var) + 3;
         value = variable->retrieve(var);
 
-      // immediate variable between parenthesis, e.g. $(1/2)
+      // immediate variable between parenthesis, e.g. $(1/3) or $(1/3:%.6g)
 
       } else if (*(ptr+1) == '(') {
         var = ptr+2;
@@ -516,10 +516,20 @@ void Input::substitute(char *&str, char *&str2, int &max, int &max2, int flag)
         if (var[i] == '\0') error->one(FLERR,"Invalid immediate variable");
         var[i] = '\0';
         beyond = ptr + strlen(var) + 3;
-        sprintf(immediate,"%.20g",variable->compute_equal(var));
+
+        // check if an inline format specifier was appended with a colon
+
+        char fmtstr[64] = "%.20g";
+        char *fmtflag;
+        if ((fmtflag=strrchr(var, ':')) && (fmtflag[1]=='%')) {
+          strncpy(fmtstr,&fmtflag[1],sizeof(fmtstr)-1);
+          *fmtflag='\0';
+        }
+
+        sprintf(immediate,fmtstr,variable->compute_equal(var));
         value = immediate;
 
-        // single character variable name, e.g. $a
+      // single character variable name, e.g. $a
 
       } else {
         var = ptr;
diff --git a/src/molecule.cpp b/src/molecule.cpp
index 56e56dab2cd16ef45c9c6068161c29a4815d51ca..dfbe3e1e08e07527096f6db6d4de80557c370415 100644
--- a/src/molecule.cpp
+++ b/src/molecule.cpp
@@ -52,6 +52,9 @@ Molecule::Molecule(LAMMPS *lmp, int narg, char **arg, int &index) :
 
   if (index >= narg) error->all(FLERR,"Illegal molecule command");
 
+  if (domain->box_exist == 0)
+    error->all(FLERR,"Molecule command before simulation box is defined");
+
   int n = strlen(arg[0]) + 1;
   id = new char[n];
   strcpy(id,arg[0]);
@@ -684,7 +687,7 @@ void Molecule::types(char *line)
   }
 
   for (int i = 0; i < natoms; i++)
-    if (type[i] <= 0)
+    if (type[i] <= 0 || type[i] > atom->ntypes)
       error->all(FLERR,"Invalid atom type in molecule file");
 
   for (int i = 0; i < natoms; i++)
@@ -771,10 +774,10 @@ void Molecule::bonds(int flag, char *line)
       error->all(FLERR,"Invalid Bonds section in molecule file");
     itype += boffset;
 
-    if (atom1 <= 0 || atom1 > natoms ||
-        atom2 <= 0 || atom2 > natoms)
+    if ((atom1 <= 0) || (atom1 > natoms) ||
+        (atom2 <= 0) || (atom2 > natoms) || (atom1 == atom2))
       error->one(FLERR,"Invalid atom ID in Bonds section of molecule file");
-    if (itype <= 0)
+    if (itype <= 0 || itype > atom->nbondtypes)
       error->one(FLERR,"Invalid bond type in Bonds section of molecule file");
 
     if (flag) {
@@ -829,11 +832,12 @@ void Molecule::angles(int flag, char *line)
       error->all(FLERR,"Invalid Angles section in molecule file");
     itype += aoffset;
 
-    if (atom1 <= 0 || atom1 > natoms ||
-        atom2 <= 0 || atom2 > natoms ||
-        atom3 <= 0 || atom3 > natoms)
+    if ((atom1 <= 0) || (atom1 > natoms) ||
+        (atom2 <= 0) || (atom2 > natoms) ||
+        (atom3 <= 0) || (atom3 > natoms) ||
+        (atom1 == atom2) || (atom1 == atom3) || (atom2 == atom3))
       error->one(FLERR,"Invalid atom ID in Angles section of molecule file");
-    if (itype <= 0)
+    if (itype <= 0 || itype > atom->nangletypes)
       error->one(FLERR,"Invalid angle type in Angles section of molecule file");
 
     if (flag) {
@@ -902,13 +906,15 @@ void Molecule::dihedrals(int flag, char *line)
       error->all(FLERR,"Invalid Dihedrals section in molecule file");
     itype += doffset;
 
-    if (atom1 <= 0 || atom1 > natoms ||
-        atom2 <= 0 || atom2 > natoms ||
-        atom3 <= 0 || atom3 > natoms ||
-        atom4 <= 0 || atom4 > natoms)
+    if ((atom1 <= 0) || (atom1 > natoms) ||
+        (atom2 <= 0) || (atom2 > natoms) ||
+        (atom3 <= 0) || (atom3 > natoms) ||
+        (atom4 <= 0) || (atom4 > natoms) ||
+        (atom1 == atom2) || (atom1 == atom3) || (atom1 == atom4) ||
+        (atom2 == atom3) || (atom2 == atom4) || (atom3 == atom4))
       error->one(FLERR,
                  "Invalid atom ID in dihedrals section of molecule file");
-    if (itype <= 0)
+    if (itype <= 0 || itype > atom->ndihedraltypes)
       error->one(FLERR,
                  "Invalid dihedral type in dihedrals section of molecule file");
 
@@ -989,13 +995,15 @@ void Molecule::impropers(int flag, char *line)
       error->all(FLERR,"Invalid Impropers section in molecule file");
     itype += ioffset;
 
-    if (atom1 <= 0 || atom1 > natoms ||
-        atom2 <= 0 || atom2 > natoms ||
-        atom3 <= 0 || atom3 > natoms ||
-        atom4 <= 0 || atom4 > natoms)
+    if ((atom1 <= 0) || (atom1 > natoms) ||
+        (atom2 <= 0) || (atom2 > natoms) ||
+        (atom3 <= 0) || (atom3 > natoms) ||
+        (atom4 <= 0) || (atom4 > natoms) ||
+        (atom1 == atom2) || (atom1 == atom3) || (atom1 == atom4) ||
+        (atom2 == atom3) || (atom2 == atom4) || (atom3 == atom4))
       error->one(FLERR,
                  "Invalid atom ID in impropers section of molecule file");
-    if (itype <= 0)
+    if (itype <= 0 || itype > atom->nimpropertypes)
       error->one(FLERR,
                  "Invalid improper type in impropers section of molecule file");
 
diff --git a/src/pair.cpp b/src/pair.cpp
index 73dbb9428e37939a7a407ad70d8b9ea516a8a133..9bb1ad212f19de34473e5f115d22b3c8bebe2acb 100644
--- a/src/pair.cpp
+++ b/src/pair.cpp
@@ -695,7 +695,15 @@ void Pair::compute_dummy(int eflag, int vflag)
 /* ---------------------------------------------------------------------- */
 void Pair::read_restart(FILE *)
 {
-  error->all(FLERR,"BUG: restartinfo=1 but no restart support in pair style");
+  if (comm->me == 0)
+    error->warning(FLERR,"BUG: restartinfo=1 but no restart support in pair style");
+}
+
+/* ---------------------------------------------------------------------- */
+void Pair::write_restart(FILE *)
+{
+  if (comm->me == 0)
+    error->warning(FLERR,"BUG: restartinfo=1 but no restart support in pair style");
 }
 
 /* -------------------------------------------------------------------
diff --git a/src/pair.h b/src/pair.h
index 26488103a05e2044d537f31dcf8a4b8b2fa0073b..844bc0cdc75c12983d98da16273b70c1257044a8 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -157,7 +157,7 @@ class Pair : protected Pointers {
   virtual void free_tables();
   virtual void free_disp_tables();
 
-  virtual void write_restart(FILE *) {}
+  virtual void write_restart(FILE *);
   virtual void read_restart(FILE *);
   virtual void write_restart_settings(FILE *) {}
   virtual void read_restart_settings(FILE *) {}
@@ -303,7 +303,9 @@ No kspace style is defined.
 
 E: BUG: restartinfo=1 but no restart support in pair style
 
-UNDOCUMENTED
+The pair style has a bug, where it does not support reading
+and writing information to a restart file, but does not set
+the member variable restartinfo to 0 as required in that case.
 
 E: Cannot yet use compute tally with Kokkos
 
diff --git a/src/variable.cpp b/src/variable.cpp
index b5c19a48439cd2342273e7af4b7d21936ffe9bd2..86296b4b402e4c066d2692d5362d579be635c54d 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -284,12 +284,21 @@ void Variable::set(int narg, char **arg)
 
   } else if (strcmp(arg[1],"string") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
+
+    int maxcopy = strlen(arg[2]) + 1;
+    int maxwork = maxcopy;
+    char *scopy = new char[maxcopy];
+    char *work = new char[maxwork];
+    strcpy(scopy,arg[2]);
+    input->substitute(scopy,work,maxcopy,maxwork,1);
+    delete [] work;
+
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != STRING)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
-      copy(1,&arg[2],data[ivar]);
+      copy(1,&scopy,data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
@@ -298,8 +307,9 @@ void Variable::set(int narg, char **arg)
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
-      copy(1,&arg[2],data[nvar]);
+      copy(1,&scopy,data[nvar]);
     }
+    delete [] scopy;
 
   // GETENV
   // remove pre-existing var if also style GETENV (allows it to be reset)
diff --git a/src/variable.h b/src/variable.h
index a74cdba980f4cee3c75486a08706e3c3ba4f96ca..b20eb7e6b93e566f77d5421e820e59eb1a9b79b1 100644
--- a/src/variable.h
+++ b/src/variable.h
@@ -16,6 +16,7 @@
 
 #include <cstdlib>
 #include "pointers.h"
+#include "input.h"
 
 namespace LAMMPS_NS {
 
diff --git a/src/version.h b/src/version.h
index 981ec8eeb8c8e47627ab4d756be509e7b7fe2872..04baa25f126ef93cb3f47bcc2ddd066e2b08177b 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "11 May 2018"
+#define LAMMPS_VERSION "22 Jun 2018"