diff --git a/doc/src/Build_manual.txt b/doc/src/Build_manual.txt
new file mode 100644
index 0000000000000000000000000000000000000000..695ac21a13fb9c77f9bfd4d70edeadb8e3648fb0
--- /dev/null
+++ b/doc/src/Build_manual.txt
@@ -0,0 +1,125 @@
+"Previous Section"_Errors.html - "LAMMPS WWW Site"_lws -
+"LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next
+Section"_Manual.html :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Commands_all.html)
+
+:line
+
+Building the LAMMPS Manual :h2
+
+Depending on how you obtained LAMMPS, the doc directory has 
+2 or 3 sub-directories and optionally 2 PDF files and an ePUB file:
+
+src             # content files for LAMMPS documentation
+html            # HTML version of the LAMMPS manual (see html/Manual.html)
+tools           # tools and settings for building the documentation
+Manual.pdf      # large PDF version of entire manual
+Developer.pdf   # small PDF with info about how LAMMPS is structured
+LAMMPS.epub     # Manual in ePUB format :pre
+
+If you downloaded LAMMPS as a tarball from the web site, all these
+directories and files should be included.
+
+If you downloaded LAMMPS from the public SVN or Git repositories, then
+the HTML and PDF files are not included.  Instead you need to create
+them, in one of three ways:
+
+(a) You can "fetch" the current HTML and PDF files from the LAMMPS web
+site.  Just type "make fetch".  This should create a html_www dir and
+Manual_www.pdf/Developer_www.pdf files.  Note that if new LAMMPS
+features have been added more recently than the date of your version,
+the fetched documentation will include those changes (but your source
+code will not, unless you update your local repository).
+
+(b) You can build the HTML and PDF files yourself, by typing "make
+html" followed by "make pdf".  Note that the PDF make requires the
+HTML files already exist.  This requires various tools including
+Sphinx, which the build process will attempt to download and install
+on your system, if not already available.  See more details below.
+
+(c) You can genererate an older, simpler, less-fancy style of HTML
+documentation by typing "make old".  This will create an "old"
+directory.  This can be useful if (b) does not work on your box for
+some reason, or you want to quickly view the HTML version of a doc
+page you have created or edited yourself within the src directory.
+E.g. if you are planning to submit a new feature to LAMMPS.
+
+:line
+
+The generation of all documentation is managed by the Makefile in
+the doc dir.
+
+Documentation Build Options: :pre
+
+make html         # generate HTML in html dir using Sphinx
+make pdf          # generate 2 PDF files (Manual.pdf,Developer.pdf)
+                  #   in doc dir via htmldoc and pdflatex
+make old          # generate old-style HTML pages in old dir via txt2html
+make fetch        # fetch HTML doc pages and 2 PDF files from web site
+                  #   as a tarball and unpack into html dir and 2 PDFs
+make epub         # generate LAMMPS.epub in ePUB format using Sphinx 
+make clean        # remove intermediate RST files created by HTML build
+make clean-all    # remove entire build folder and any cached data :pre
+
+:line
+
+Installing prerequisites for HTML build :h3
+
+To run the HTML documention build toolchain, Python 3 and virtualenv
+have to be installed.  Here are instructions for common setups:
+
+Ubuntu :h4
+
+sudo apt-get install python-virtualenv :pre
+
+Fedora (up to version 21) and Red Hat Enterprise Linux or CentOS (up to version 7.x) :h4
+
+sudo yum install python3-virtualenv :pre
+
+Fedora (since version 22) :h4
+
+sudo dnf install python3-virtualenv pre
+
+MacOS X :h4
+
+Python 3 :h5
+
+Download the latest Python 3 MacOS X package from
+"https://www.python.org"_https://www.python.org
+and install it.  This will install both Python 3
+and pip3.
+
+virtualenv :h5
+
+Once Python 3 is installed, open a Terminal and type
+
+pip3 install virtualenv :pre
+
+This will install virtualenv from the Python Package Index.
+
+:line
+
+Installing prerequisites for PDF build
+
+[TBA]
+
+:line
+
+Installing prerequisites for epub build :h3
+
+ePUB :h4
+
+Same as for HTML. This uses the same tools and configuration
+files as the HTML tree.
+
+For converting the generated ePUB file to a mobi format file
+(for e-book readers like Kindle, that cannot read ePUB), you
+also need to have the 'ebook-convert' tool from the "calibre"
+software installed. "http://calibre-ebook.com/"_http://calibre-ebook.com/
+You first create the ePUB file with 'make epub' and then do:
+
+ebook-convert LAMMPS.epub LAMMPS.mobi :pre
+
diff --git a/doc/src/Commands_bond.txt b/doc/src/Commands_bond.txt
index 314260cb14bc582a5791912ac055ec50260878f1..48069d3120048e9d59ead6a3f2959be296d078de 100644
--- a/doc/src/Commands_bond.txt
+++ b/doc/src/Commands_bond.txt
@@ -9,7 +9,7 @@ Documentation"_ld - "LAMMPS Commands"_lc :c
 "Fix styles"_Commands_fix.html,
 "Compute styles"_Commands_compute.html,
 "Pair styles"_Commands_pair.html,
-"Bond styles"_Commands_bond.html,
+"Bond styles"_Commands_bond.html#bond,
 "Angle styles"_Commands_bond.html#angle,
 "Dihedral styles"_Commands_bond.html#dihedral,
 "Improper styles"_Commands_bond.html#improper,
diff --git a/doc/src/Errors_common.txt b/doc/src/Errors_common.txt
index 8f26b22b3a8a8f9b47ea2df5e6b650a318fc795f..d82114bd38c2331c1a35594f0f8159116047d14a 100644
--- a/doc/src/Errors_common.txt
+++ b/doc/src/Errors_common.txt
@@ -93,7 +93,7 @@ decide if the WARNING is important or not.  A WARNING message that is
 generated in the middle of a run is only printed to the screen, not to
 the logfile, to avoid cluttering up thermodynamic output.  If LAMMPS
 crashes or hangs without spitting out an error message first then it
-could be a bug (see "this section"_#err_2) or one of the following
+could be a bug (see "this section"_Errors_bugs.html) or one of the following
 cases:
 
 LAMMPS runs in the available memory a processor allows to be
diff --git a/doc/src/Howto_bioFF.txt b/doc/src/Howto_bioFF.txt
index afb8a84f2e32ac4397054054c4032ab339629290..deb5b31441918e8646b29c74740fc16ed0ed4b52 100644
--- a/doc/src/Howto_bioFF.txt
+++ b/doc/src/Howto_bioFF.txt
@@ -96,6 +96,10 @@ documentation for the formula it computes.
 [(MacKerell)] MacKerell, Bashford, Bellott, Dunbrack, Evanseck, Field,
 Fischer, Gao, Guo, Ha, et al, J Phys Chem, 102, 3586 (1998).
 
+:link(howto-Cornell)
+[(Cornell)] Cornell, Cieplak, Bayly, Gould, Merz, Ferguson,
+Spellmeyer, Fox, Caldwell, Kollman, JACS 117, 5179-5197 (1995).
+
 :link(howto-Mayo)
 [(Mayo)] Mayo, Olfason, Goddard III, J Phys Chem, 94, 8897-8909
 (1990).
diff --git a/doc/src/Howto_tip4p.txt b/doc/src/Howto_tip4p.txt
index f9e548e26896c1143f1cead59e3c0a1effb015c3..9f7f1413147e9f13e9ce21b44457502d1d95fa87 100644
--- a/doc/src/Howto_tip4p.txt
+++ b/doc/src/Howto_tip4p.txt
@@ -31,7 +31,7 @@ using the "fix shake"_fix_shake.html command.
 
 These are the additional parameters (in real units) to set for O and H
 atoms and the water molecule to run a rigid TIP4P model with a cutoff
-"(Jorgensen)"_#Jorgensen1.  Note that the OM distance is specified in
+"(Jorgensen)"_#Jorgensen5.  Note that the OM distance is specified in
 the "pair_style"_pair_style.html command, not as part of the pair
 coefficients.
 
@@ -107,6 +107,6 @@ models"_http://en.wikipedia.org/wiki/Water_model.
 
 :line
 
-:link(Jorgensen1)
+:link(Jorgensen5)
 [(Jorgensen)] Jorgensen, Chandrasekhar, Madura, Impey, Klein, J Chem
 Phys, 79, 926 (1983).
diff --git a/doc/src/Intro_features.txt b/doc/src/Intro_features.txt
index 2bb7e25683be9bd90610193a3b265729b1eb9afa..07c549c1566a73c09d66b92b590c87d5654773d5 100644
--- a/doc/src/Intro_features.txt
+++ b/doc/src/Intro_features.txt
@@ -20,7 +20,7 @@ classes of functionality:
 "Integrators"_#integrate
 "Diagnostics"_#diag
 "Output"_#output
-"Multi-replica models"_#replica
+"Multi-replica models"_#replica1
 "Pre- and post-processing"_#prepost
 "Specialized features (beyond MD itself)"_#special :ul
 
@@ -154,7 +154,7 @@ Output :h4,link(output)
   time averaging of system-wide quantities
   atom snapshots in native, XYZ, XTC, DCD, CFG formats :ul
 
-Multi-replica models :h4,link(replica)
+Multi-replica models :h4,link(replica1)
 
 "nudged elastic band"_neb.html
 "parallel replica dynamics"_prd.html
diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index a3747465cd6b391bb7fb996bd9393a71aa826b4a..ec85547decff917996fecdbf769455ec5911d05a 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -8,6 +8,8 @@
 
 <BODY>
 
+<H1></H1>
+
 <!-- END_HTML_ONLY -->
 
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
@@ -18,8 +20,6 @@
 
 :line
 
-<H1></H1>
-
 LAMMPS Documentation :c,h1
 2 Aug 2018 version :c,h2
 
@@ -71,6 +71,7 @@ every LAMMPS command.
    :name: userdoc
    :includehidden:
 
+   Manual_version
    Intro
    Install
    Build
@@ -84,6 +85,7 @@ every LAMMPS command.
    Modify
    Python
    Errors
+   Build_manual
 
 .. toctree::
    :caption: Index
diff --git a/doc/src/Packages_details.txt b/doc/src/Packages_details.txt
index 1dbde273b46bdc8ca8ba28a966af7c3a1613c612..033e70dbc19a21ad93a003bbfe9db777b1085e30 100644
--- a/doc/src/Packages_details.txt
+++ b/doc/src/Packages_details.txt
@@ -56,7 +56,7 @@ as contained in the file name.
 "PYTHON"_#PYTHON,
 "QEQ"_#QEQ,
 "REAX"_#REAX,
-"REPLICA"_#REPLICA,
+"REPLICA"_#REPLICA2,
 "RIGID"_#RIGID,
 "SHOCK"_#SHOCK,
 "SNAP"_#SNAP,
@@ -294,7 +294,7 @@ src/GPU: filenames -> commands
 src/GPU/README
 lib/gpu/README
 "Speed packages"_Speed_packages.html
-"Speed gpu"_Speed_gpu.html.html
+"Speed gpu"_Speed_gpu.html
 "Section 2.6 -sf gpu"_Run_options.html
 "Section 2.6 -pk gpu"_Run_options.html
 "package gpu"_package.html
@@ -831,7 +831,7 @@ examples/reax :ul
 
 :line
 
-REPLICA package :link(REPLICA),h4
+REPLICA package :link(REPLICA2),h4
 
 [Contents:]
 
@@ -937,7 +937,7 @@ the usual manner via MD.  Various pair, fix, and compute styles.
 [Supporting info:]
 
 src/SPIN: filenames -> commands
-"Howto spin"_Howto_spin.html
+"Howto spins"_Howto_spins.html
 "pair_style spin/dmi"_pair_spin_dmi.html
 "pair_style spin/exchange"_pair_spin_exchange.html
 "pair_style spin/magelec"_pair_spin_magelec.html
diff --git a/doc/src/Packages_standard.txt b/doc/src/Packages_standard.txt
index 7e6434560244abfce710c182af2c4fb6cad31da6..56ec693185ec715a40ac7b3b03085b000ab8cba8 100644
--- a/doc/src/Packages_standard.txt
+++ b/doc/src/Packages_standard.txt
@@ -57,9 +57,9 @@ Package, Description, Doc page, Example, Library
 "PYTHON"_Packages_details.html#PYTHON, embed Python code in an input script, "python"_python.html, python, sys
 "QEQ"_Packages_details.html#QEQ, QEq charge equilibration, "fix qeq"_fix_qeq.html, qeq, -
 "REAX"_Packages_details.html#REAX, ReaxFF potential (Fortran), "pair_style reax"_pair_reax.html, reax, int
-"REPLICA"_Packages_details.html#REPLICA, multi-replica methods, "Howto replica"_Howto_replica.html, tad, -
+"REPLICA"_Packages_details.html#REPLICA2, multi-replica methods, "Howto replica"_Howto_replica.html, tad, -
 "RIGID"_Packages_details.html#RIGID, rigid bodies and constraints, "fix rigid"_fix_rigid.html, rigid, -
 "SHOCK"_Packages_details.html#SHOCK, shock loading methods, "fix msst"_fix_msst.html, -, -
 "SNAP"_Packages_details.html#SNAP, quantum-fitted potential, "pair_style snap"_pair_snap.html, snap, -
-"SPIN"_#SPIN, magnetic atomic spin dynamics, "Howto spin"_Howto_spin.html, SPIN, -"SRD"_Packages_details.html#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, -
+"SPIN"_#SPIN, magnetic atomic spin dynamics, "Howto spins"_Howto_spins.html, SPIN, -"SRD"_Packages_details.html#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, -
 "VORONOI"_Packages_details.html#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext :tb(ea=c,ca1=l)
diff --git a/doc/src/Python_install.txt b/doc/src/Python_install.txt
index 6591360ae2535935f0c9e88e8bf6bc2c830f6cb0..85cf267de06f543d092114e473eff44aa3149006 100644
--- a/doc/src/Python_install.txt
+++ b/doc/src/Python_install.txt
@@ -68,7 +68,7 @@ need to prefix this with "sudo".  In this mode you cannot control
 which Python is invoked by root.
 
 Note that if you want Python to be able to load different versions of
-the LAMMPS shared library (see "this section"_#py_5 below), you will
+the LAMMPS shared library (see "this section"_Python_shlib.html), you will
 need to manually copy files like liblammps_g++.so into the appropriate
 system directory.  This is not needed if you set the LD_LIBRARY_PATH
 environment variable as described above.
diff --git a/doc/src/Tools.txt b/doc/src/Tools.txt
index 85ee531cfd905371ebef6f17db26713c2a496d6c..aa4adb7dc1a89c909388b0b50879884f202f61c0 100644
--- a/doc/src/Tools.txt
+++ b/doc/src/Tools.txt
@@ -77,7 +77,7 @@ own sub-directories with their own Makefiles and/or README files.
 :line
 :line
 
-amber2lmp tool :h4,link(amber)
+amber2lmp tool :h3,link(amber)
 
 The amber2lmp sub-directory contains two Python scripts for converting
 files back-and-forth between the AMBER MD code and LAMMPS.  See the
@@ -92,7 +92,7 @@ necessary modifications yourself.
 
 :line
 
-binary2txt tool :h4,link(binary)
+binary2txt tool :h3,link(binary)
 
 The file binary2txt.cpp converts one or more binary LAMMPS dump file
 into ASCII text files.  The syntax for running the tool is
@@ -105,7 +105,7 @@ since binary files are not compatible across all platforms.
 
 :line
 
-ch2lmp tool :h4,link(charmm)
+ch2lmp tool :h3,link(charmm)
 
 The ch2lmp sub-directory contains tools for converting files
 back-and-forth between the CHARMM MD code and LAMMPS.
@@ -130,7 +130,7 @@ Chris Lorenz (chris.lorenz at kcl.ac.uk), King's College London.
 
 :line
 
-chain tool :h4,link(chain)
+chain tool :h3,link(chain)
 
 The file chain.f creates a LAMMPS data file containing bead-spring
 polymer chains and/or monomer solvent atoms.  It uses a text file
@@ -147,7 +147,7 @@ for the "chain benchmark"_Speed_bench.html.
 
 :line
 
-colvars tools :h4,link(colvars)
+colvars tools :h3,link(colvars)
 
 The colvars directory contains a collection of tools for postprocessing
 data produced by the colvars collective variable library.
@@ -169,7 +169,7 @@ gmail.com) at ICTP, Italy.
 
 :line
 
-createatoms tool :h4,link(createatoms)
+createatoms tool :h3,link(createatoms)
 
 The tools/createatoms directory contains a Fortran program called
 createAtoms.f which can generate a variety of interesting crystal
@@ -182,7 +182,7 @@ The tool is authored by Xiaowang Zhou (Sandia), xzhou at sandia.gov.
 
 :line
 
-doxygen tool :h4,link(doxygen)
+doxygen tool :h3,link(doxygen)
 
 The tools/doxygen directory contains a shell script called
 doxygen.sh which can generate a call graph and API lists using
@@ -194,7 +194,7 @@ The tool is authored by Nandor Tamaskovics, numericalfreedom at googlemail.com.
 
 :line
 
-drude tool :h4,link(drude)
+drude tool :h3,link(drude)
 
 The tools/drude directory contains a Python script called
 polarizer.py which can add Drude oscillators to a LAMMPS
@@ -207,7 +207,7 @@ at univ-bpclermont.fr, alain.dequidt at univ-bpclermont.fr
 
 :line
 
-eam database tool :h4,link(eamdb)
+eam database tool :h3,link(eamdb)
 
 The tools/eam_database directory contains a Fortran program that will
 generate EAM alloy setfl potential files for any combination of 16
@@ -223,7 +223,7 @@ X. W. Zhou, R. A. Johnson, and H. N. G. Wadley, Phys. Rev. B, 69,
 
 :line
 
-eam generate tool :h4,link(eamgn)
+eam generate tool :h3,link(eamgn)
 
 The tools/eam_generate directory contains several one-file C programs
 that convert an analytic formula into a tabulated "embedded atom
@@ -236,7 +236,7 @@ The source files and potentials were provided by Gerolf Ziegenhain
 
 :line
 
-eff tool :h4,link(eff)
+eff tool :h3,link(eff)
 
 The tools/eff directory contains various scripts for generating
 structures and post-processing output for simulations using the
@@ -247,7 +247,7 @@ These tools were provided by Andres Jaramillo-Botero at CalTech
 
 :line
 
-emacs tool :h4,link(emacs)
+emacs tool :h3,link(emacs)
 
 The tools/emacs directory contains an Emacs Lisp add-on file for GNU Emacs 
 that enables a lammps-mode for editing input scripts when using GNU Emacs,
@@ -258,7 +258,7 @@ These tools were provided by Aidan Thompson at Sandia
 
 :line
 
-fep tool :h4,link(fep)
+fep tool :h3,link(fep)
 
 The tools/fep directory contains Python scripts useful for
 post-processing results from performing free-energy perturbation
@@ -271,7 +271,7 @@ See README file in the tools/fep directory.
 
 :line
 
-i-pi tool :h4,link(ipi)
+i-pi tool :h3,link(ipi)
 
 The tools/i-pi directory contains a version of the i-PI package, with
 all the LAMMPS-unrelated files removed.  It is provided so that it can
@@ -288,7 +288,7 @@ calculations with LAMMPS.
 
 :line
 
-ipp tool :h4,link(ipp)
+ipp tool :h3,link(ipp)
 
 The tools/ipp directory contains a Perl script ipp which can be used
 to facilitate the creation of a complicated file (say, a lammps input
@@ -302,7 +302,7 @@ tools/createatoms tool's input file.
 
 :line
 
-kate tool :h4,link(kate)
+kate tool :h3,link(kate)
 
 The file in the tools/kate directory is an add-on to the Kate editor
 in the KDE suite that allow syntax highlighting of LAMMPS input
@@ -313,7 +313,7 @@ The file was provided by Alessandro Luigi Sellerio
 
 :line
 
-lmp2arc tool :h4,link(arc)
+lmp2arc tool :h3,link(arc)
 
 The lmp2arc sub-directory contains a tool for converting LAMMPS output
 files to the format for Accelrys' Insight MD code (formerly
@@ -329,7 +329,7 @@ Greathouse at Sandia (jagreat at sandia.gov).
 
 :line
 
-lmp2cfg tool :h4,link(cfg)
+lmp2cfg tool :h3,link(cfg)
 
 The lmp2cfg sub-directory contains a tool for converting LAMMPS output
 files into a series of *.cfg files which can be read into the
@@ -340,7 +340,7 @@ This tool was written by Ara Kooser at Sandia (askoose at sandia.gov).
 
 :line
 
-matlab tool :h4,link(matlab)
+matlab tool :h3,link(matlab)
 
 The matlab sub-directory contains several "MATLAB"_matlabhome scripts for
 post-processing LAMMPS output.  The scripts include readers for log
@@ -358,7 +358,7 @@ These scripts were written by Arun Subramaniyan at Purdue Univ
 
 :line
 
-micelle2d tool :h4,link(micelle)
+micelle2d tool :h3,link(micelle)
 
 The file micelle2d.f creates a LAMMPS data file containing short lipid
 chains in a monomer solution.  It uses a text file containing lipid
@@ -375,7 +375,7 @@ definition file.  This tool was used to create the system for the
 
 :line
 
-moltemplate tool :h4,link(moltemplate)
+moltemplate tool :h3,link(moltemplate)
 
 The moltemplate sub-directory contains a Python-based tool for
 building molecular systems based on a text-file description, and
@@ -389,7 +389,7 @@ supports it.  It has its own WWW page at
 
 :line
 
-msi2lmp tool :h4,link(msi)
+msi2lmp tool :h3,link(msi)
 
 The msi2lmp sub-directory contains a tool for creating LAMMPS template
 input and data files from BIOVIA's Materias Studio files (formerly Accelrys'
@@ -406,7 +406,7 @@ See the README file in the tools/msi2lmp folder for more information.
 
 :line
 
-phonon tool :h4,link(phonon)
+phonon tool :h3,link(phonon)
 
 The phonon sub-directory contains a post-processing tool useful for
 analyzing the output of the "fix phonon"_fix_phonon.html command in
@@ -421,7 +421,7 @@ University.
 
 :line
 
-polybond tool :h4,link(polybond)
+polybond tool :h3,link(polybond)
 
 The polybond sub-directory contains a Python-based tool useful for
 performing "programmable polymer bonding".  The Python file
@@ -435,7 +435,7 @@ This tool was written by Zachary Kraus at Georgia Tech.
 
 :line
 
-pymol_asphere tool :h4,link(pymol)
+pymol_asphere tool :h3,link(pymol)
 
 The pymol_asphere sub-directory contains a tool for converting a
 LAMMPS dump file that contains orientation info for ellipsoidal
@@ -453,7 +453,7 @@ This tool was written by Mike Brown at Sandia.
 
 :line
 
-python tool :h4,link(pythontools)
+python tool :h3,link(pythontools)
 
 The python sub-directory contains several Python scripts
 that perform common LAMMPS post-processing tasks, such as:
@@ -469,7 +469,7 @@ README for more info on Pizza.py and how to use these scripts.
 
 :line
 
-reax tool :h4,link(reax_tool)
+reax tool :h3,link(reax_tool)
 
 The reax sub-directory contains stand-alond codes that can
 post-process the output of the "fix reax/bonds"_fix_reax_bonds.html
@@ -480,7 +480,7 @@ These tools were written by Aidan Thompson at Sandia.
 
 :line
 
-smd tool :h4,link(smd)
+smd tool :h3,link(smd)
 
 The smd sub-directory contains a C++ file dump2vtk_tris.cpp and
 Makefile which can be compiled and used to convert triangle output
@@ -496,7 +496,7 @@ Ernst Mach Institute in Germany (georg.ganzenmueller at emi.fhg.de).
 
 :line
 
-vim tool :h4,link(vim)
+vim tool :h3,link(vim)
 
 The files in the tools/vim directory are add-ons to the VIM editor
 that allow easier editing of LAMMPS input scripts.  See the README.txt
@@ -507,7 +507,7 @@ ziegenhain.com)
 
 :line
 
-xmgrace tool :h4,link(xmgrace)
+xmgrace tool :h3,link(xmgrace)
 
 The files in the tools/xmgrace directory can be used to plot the
 thermodynamic data in LAMMPS log files via the xmgrace plotting
diff --git a/doc/src/fix_manifoldforce.txt b/doc/src/fix_manifoldforce.txt
index 8dd992170b1b73a77fe09d865bc83f844f302a85..a25b9f0b2e1c47d951d81906580dc6d011e32ea6 100644
--- a/doc/src/fix_manifoldforce.txt
+++ b/doc/src/fix_manifoldforce.txt
@@ -25,7 +25,7 @@ fix constrain all manifoldforce sphere 5.0
 [Description:]
 
 This fix subtracts each time step from the force the component along
-the normal of the specified "manifold"_manifolds.html.  This can be
+the normal of the specified "manifold"_Howto_manifold.html.  This can be
 used in combination with "minimize"_minimize.html to remove overlap
 between particles while keeping them (roughly) constrained to the
 given manifold, e.g. to set up a run with "fix
diff --git a/doc/src/fix_property_atom.txt b/doc/src/fix_property_atom.txt
index 136ed6c15e6ecb9de65599eb41a821d6487bbf69..8a70cd8213ccc223c279e6961bf31b059c2dbf1d 100644
--- a/doc/src/fix_property_atom.txt
+++ b/doc/src/fix_property_atom.txt
@@ -200,7 +200,8 @@ added classes.
 
 :line
 
-:link(isotopes) Example for using per-atom masses with TIP4P water to
+:link(isotopes)
+Example for using per-atom masses with TIP4P water to
 study isotope effects. When setting up simulations with the "TIP4P
 pair styles"_Howto_tip4p.html for water, you have to provide exactly
 one atom type each to identify the water oxygen and hydrogen
diff --git a/doc/src/fix_wall_reflect.txt b/doc/src/fix_wall_reflect.txt
index e4dd7e2fb9a5ab32c31d702023c43ee79efd88a9..d43cafbf0949f2706b05b3ea06f0e137ff211f15 100644
--- a/doc/src/fix_wall_reflect.txt
+++ b/doc/src/fix_wall_reflect.txt
@@ -51,7 +51,7 @@ corresponding component of its velocity is flipped.
 When used in conjunction with "fix nve"_fix_nve.html and "run_style
 verlet"_run_style.html, the resultant time-integration algorithm is
 equivalent to the primitive splitting algorithm (PSA) described by
-"Bond"_#Bond.  Because each reflection event divides
+"Bond"_#Bond1.  Because each reflection event divides
 the corresponding timestep asymmetrically, energy conservation is only
 satisfied to O(dt), rather than to O(dt^2) as it would be for
 velocity-Verlet integration without reflective walls.
@@ -179,5 +179,5 @@ error.
 
 :line
 
-:link(Bond)
+:link(Bond1)
 [(Bond)] Bond and Leimkuhler, SIAM J Sci Comput, 30, p 134 (2007).
diff --git a/doc/src/lammps.book b/doc/src/lammps.book
index 88ef6d77cb69255671b9b35017b82f8c7b8ce34b..c45a930bbc6d3d1373737ec8f0689f46fb1cf694 100644
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@@ -1,6 +1,7 @@
 #HTMLDOC 1.8.28
 -t pdf14 -f "../Manual.pdf" --book --toclevels 4 --no-numbered --toctitle "Table of Contents" --title --textcolor #000000 --linkcolor #0000ff --linkstyle plain --bodycolor #ffffff --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer ..1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=9 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Sans --bodyfont Serif --headfootsize 11.0 --headfootfont Sans-Bold --charset iso-8859-15 --links --embedfonts --pagemode document --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all  --owner-password ""  --user-password "" --browserwidth 680 --no-strict --no-overflow
 Manual.html
+Manual_version.html
 Intro.html
 Intro_overview.html
 Intro_features.html
@@ -126,6 +127,7 @@ Errors_common.html
 Errors_bugs.html
 Errors_messages.html
 Errors_warnings.html
+Build_manual.html
 
 lammps_commands.html
 atom_modify.html
diff --git a/lib/gpu/geryon/ocl_timer.h b/lib/gpu/geryon/ocl_timer.h
index 1f56aeb364ccc73c3b6ce22fa2644f0c9413f64b..bdfec64f542f3ab29835190e8408e4a788e6569b 100644
--- a/lib/gpu/geryon/ocl_timer.h
+++ b/lib/gpu/geryon/ocl_timer.h
@@ -38,8 +38,8 @@ namespace ucl_opencl {
 /// Class for timing OpenCL events
 class UCL_Timer {
  public:
-  inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
-  inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
+  inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { }
+  inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false)
     { init(dev); }
 
   inline ~UCL_Timer() { clear(); }
@@ -52,6 +52,7 @@ class UCL_Timer {
       _initialized=false;
       _total_time=0.0;
     }
+    has_measured_time = false;
   }
 
   /// Initialize default command queue for timing
@@ -64,25 +65,39 @@ class UCL_Timer {
     _cq=cq;
     clRetainCommandQueue(_cq);
     _initialized=true;
+    has_measured_time = false;
   }
 
   /// Start timing on default command queue
-  inline void start() { UCL_OCL_MARKER(_cq,&start_event); }
+  inline void start() {
+    UCL_OCL_MARKER(_cq,&start_event);
+    has_measured_time = false;
+  }
 
   /// Stop timing on default command queue
-  inline void stop() { UCL_OCL_MARKER(_cq,&stop_event); }
+  inline void stop() {
+    UCL_OCL_MARKER(_cq,&stop_event);
+    has_measured_time = true;
+  }
 
   /// Block until the start event has been reached on device
-  inline void sync_start()
-    { CL_SAFE_CALL(clWaitForEvents(1,&start_event)); }
+  inline void sync_start() {
+    CL_SAFE_CALL(clWaitForEvents(1,&start_event));
+    has_measured_time = false;
+  }
 
   /// Block until the stop event has been reached on device
-  inline void sync_stop()
-    { CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); }
+  inline void sync_stop() {
+    CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
+    has_measured_time = true;
+  }
 
   /// Set the time elapsed to zero (not the total_time)
-  inline void zero()
-    { UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&stop_event); }
+  inline void zero() {
+    has_measured_time = false;
+    UCL_OCL_MARKER(_cq,&start_event);
+    UCL_OCL_MARKER(_cq,&stop_event);
+  }
 
   /// Set the total time to zero
   inline void zero_total() { _total_time=0.0; }
@@ -97,6 +112,7 @@ class UCL_Timer {
 
   /// Return the time (ms) of last start to stop - Forces synchronization
   inline double time() {
+    if(!has_measured_time) return 0.0;
     cl_ulong tstart,tend;
     CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
     CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@@ -107,6 +123,7 @@ class UCL_Timer {
                                          sizeof(cl_ulong), &tstart, NULL));
     clReleaseEvent(start_event);
     clReleaseEvent(stop_event);
+    has_measured_time = false;
     return (tend-tstart)*t_factor;
   }
 
@@ -123,8 +140,9 @@ class UCL_Timer {
   cl_event start_event, stop_event;
   cl_command_queue _cq;
   double _total_time;
-  bool _initialized;
   double t_factor;
+  bool _initialized;
+  bool has_measured_time;
 };
 
 } // namespace
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 49b11f3ae0a4a0fcc9b462412ad2b0fa9baf16cf..11aa695f087255e7cfbc4bd166acab40ce2f73d7 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -292,7 +292,8 @@ public:
 
 #if ! defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
       if ( 0 == r ) {
-        Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
+        //Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
+        return handle_type();
       }
 #endif
 
diff --git a/src/.gitignore b/src/.gitignore
index 058833ffb38dfca1ea188bebfe5522f943a1c6df..df3a22a5cd08b1a924aadde018674d972f340a5f 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -729,6 +729,8 @@
 /pair_eam.h
 /pair_eam_alloy.cpp
 /pair_eam_alloy.h
+/pair_eam_cd.cpp
+/pair_eam_cd.h
 /pair_eam_fs.cpp
 /pair_eam_fs.h
 /pair_edip.cpp
diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp
index cd0b2ba2683742ab1140ce3a41e0e7c506a40a33..bfa45492864ff4708fb0e22167b04b69ca95cbc1 100644
--- a/src/USER-UEF/fix_nh_uef.cpp
+++ b/src/USER-UEF/fix_nh_uef.cpp
@@ -536,10 +536,26 @@ void FixNHUef::pre_exchange()
     rotate_x(rot);
     rotate_f(rot);
 
-    // put all atoms in the new box
-    double **x = atom->x;
+    // this is a generalization of what is done in domain->image_flip(...)
+    int ri[3][3];
+    uefbox->get_inverse_cob(ri);
     imageint *image = atom->image;
     int nlocal = atom->nlocal;
+    for (int i=0; i<nlocal; i++) {
+      int iold[3],inew[3];
+      iold[0] = (image[i] & IMGMASK) - IMGMAX;
+      iold[1] = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+      iold[2] = (image[i] >> IMG2BITS) - IMGMAX;
+      inew[0] = ri[0][0]*iold[0] + ri[0][1]*iold[1] + ri[0][2]*iold[2];
+      inew[1] = ri[1][0]*iold[0] + ri[1][1]*iold[1] + ri[1][2]*iold[2];
+      inew[2] = ri[2][0]*iold[0] + ri[2][1]*iold[1] + ri[2][2]*iold[2];
+      image[i] = ((imageint) (inew[0] + IMGMAX) & IMGMASK) |
+        (((imageint) (inew[1] + IMGMAX) & IMGMASK) << IMGBITS) |
+        (((imageint) (inew[2] + IMGMAX) & IMGMASK) << IMG2BITS);
+    }
+
+    // put all atoms in the new box
+    double **x = atom->x;
     for (int i=0; i<nlocal; i++) domain->remap(x[i],image[i]);
 
     // move atoms to the right processors
diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index a5498d605f3d28291d91dcc320a4cd41dd999ce2..a2e6cb291e0cb4c7b4ff48e9d1d181d0b7f65e4a 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -30,47 +30,54 @@ namespace LAMMPS_NS {
 
 UEFBox::UEFBox()
 {
+
   // initial box (also an inverse eigenvector matrix of automorphisms)
+
   double x = 0.327985277605681;
   double y = 0.591009048506103;
   double z = 0.736976229099578;
   l0[0][0]= z; l0[0][1]= y; l0[0][2]= x;
   l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y;
   l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z;
+
   // spectra of the two automorpisms (log of eigenvalues)
+
   w1[0]=-1.177725211523360;
   w1[1]=-0.441448620566067;
   w1[2]= 1.619173832089425;
   w2[0]= w1[1];
   w2[1]= w1[2];
   w2[2]= w1[0];
+
   // initialize theta
   // strain = w1 * theta1 + w2 * theta2
-  theta[0]=theta[1]=0;
 
+  theta[0]=theta[1]=0;
 
   //set up the initial box l and change of basis matrix r
+
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       l[k][j] = l0[k][j];
       r[j][k]=(j==k);
+      ri[j][k]=(j==k);
     }
 
   // get the initial rotation and upper triangular matrix
+
   rotation_matrix(rot, lrot ,l);
 
   // this is just a way to calculate the automorphisms
   // themselves, which play a minor role in the calculations
   // it's overkill, but only called once
+
   double t1[3][3];
   double t1i[3][3];
   double t2[3][3];
   double t2i[3][3];
   double l0t[3][3];
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       t1[k][j] = exp(w1[k])*l0[k][j];
       t1i[k][j] = exp(-w1[k])*l0[k][j];
       t2[k][j] = exp(w2[k])*l0[k][j];
@@ -82,8 +89,7 @@ UEFBox::UEFBox()
   mul_m2(l0t,t2);
   mul_m2(l0t,t2i);
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       a1[k][j] = round(t1[k][j]);
       a1i[k][j] = round(t1i[k][j]);
       a2[k][j] = round(t2[k][j]);
@@ -92,6 +98,7 @@ UEFBox::UEFBox()
 
   // winv used to transform between
   // strain increments and theta increments
+
   winv[0][0] = w2[1];
   winv[0][1] = -w2[0];
   winv[1][0] = -w1[1];
@@ -102,7 +109,9 @@ UEFBox::UEFBox()
       winv[k][j] /= d;
 }
 
-// get volume-correct r basis in: basis*cbrt(vol) = q*r
+/* ----------------------------------------------------------------------
+   get volume-correct r basis in: basis*cbrt(vol) = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_box(double x[3][3], double v)
 {
   v = cbrtf(v);
@@ -111,7 +120,9 @@ void UEFBox::get_box(double x[3][3], double v)
       x[k][j] = lrot[k][j]*v;
 }
 
-// get rotation matrix q in: basis = q*r
+/* ----------------------------------------------------------------------
+   get rotation matrix q in: basis = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_rot(double x[3][3])
 {
   for (int k=0;k<3;k++)
@@ -119,20 +130,32 @@ void UEFBox::get_rot(double x[3][3])
       x[k][j]=rot[k][j];
 }
 
-// diagonal, incompressible deformation
+/* ----------------------------------------------------------------------
+   get inverse change of basis matrix
+------------------------------------------------------------------------- */
+void UEFBox::get_inverse_cob(int x[3][3])
+{
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      x[k][j]=ri[k][j];
+}
+
+/* ----------------------------------------------------------------------
+   apply diagonal, incompressible deformation
+------------------------------------------------------------------------- */
 void UEFBox::step_deform(const double ex, const double ey)
 {
   // increment theta values used in the reduction
+
   theta[0] +=winv[0][0]*ex + winv[0][1]*ey;
   theta[1] +=winv[1][0]*ex + winv[1][1]*ey;
 
-  // deformation of the box. reduce() needs to
-  // be called regularly or calculation will become
-  // unstable
+  // deformation of the box. reduce() needs to be called regularly or 
+  // calculation will become unstable
+
   double eps[3];
   eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey;
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     eps[k] = exp(eps[k]);
     l[k][0] = eps[k]*l[k][0];
     l[k][1] = eps[k]*l[k][1];
@@ -140,68 +163,84 @@ void UEFBox::step_deform(const double ex, const double ey)
   }
   rotation_matrix(rot,lrot, l);
 }
-// reuduce the current basis
+
+/* ----------------------------------------------------------------------
+   reduce the current basis
+------------------------------------------------------------------------- */
 bool UEFBox::reduce()
 {
-  // determine how many times to apply the automorphisms
-  // and find new theta values
+  // determine how many times to apply the automorphisms and find new theta 
+  // values
+
   int f1 = round(theta[0]);
   int f2 = round(theta[1]);
   theta[0] -= f1;
   theta[1] -= f2;
 
-  // store old change or basis matrix to determine if it
-  // changes
+  // store old change or basis matrix to determine if it changes
+
   int r0[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       r0[k][j]=r[k][j];
 
-  // this modifies the old change basis matrix to
-  // handle the case where the automorphism transforms
-  // the box but the reduced basis doesn't change
+  // this modifies the old change basis matrix to handle the case where the 
+  // automorphism transforms the box but the reduced basis doesn't change
   // (r0 should still equal r at the end)
+
   if (f1 > 0) for (int k=0;k<f1;k++) mul_m2 (a1,r0);
   if (f1 < 0) for (int k=0;k<-f1;k++) mul_m2 (a1i,r0);
   if (f2 > 0) for (int k=0;k<f2;k++) mul_m2 (a2,r0);
   if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0);
 
   // robust reduction to the box defined by Dobson
-  for (int k=0;k<3;k++)
-  {
+
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
+
   // further reduce the box using greedy reduction and check
   // if it changed from the last step using the change of basis
   // matrices r and r0
-  greedy(l,r);
+
+  greedy(l,r,ri);
+
+  // multiplying the inverse by the old change of basis matrix gives
+  // the inverse of the transformation itself (should be identity if
+  // no reduction takes place). This is used for image flags only.
+
+  mul_m1(ri,r0);
   rotation_matrix(rot,lrot, l);
   return !mat_same(r,r0);
 }
+
+/* ----------------------------------------------------------------------
+   set the strain to a specific value
+------------------------------------------------------------------------- */
 void UEFBox::set_strain(const double ex, const double ey)
 {
-  theta[0]  =winv[0][0]*ex + winv[0][1]*ey;
-  theta[1]  =winv[1][0]*ex + winv[1][1]*ey;
+  theta[0]  = winv[0][0]*ex + winv[0][1]*ey;
+  theta[1]  = winv[1][0]*ex + winv[1][1]*ey;
   theta[0] -= round(theta[0]);
   theta[1] -= round(theta[1]);
 
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
-  greedy(l,r);
+  greedy(l,r,ri);
   rotation_matrix(rot,lrot, l);
 }
 
-// this is just qr reduction using householder reflections
-// m is input matrix, q is a rotation, r is upper triangular
-// q*m = r
+/* ----------------------------------------------------------------------
+   qr reduction using householder reflections
+   q*m = r. q is orthogonal. m is input matrix. r is upper triangular
+------------------------------------------------------------------------- */
 void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
 {
   for (int k=0;k<3;k++)
@@ -217,8 +256,7 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
   v[0] /= a; v[1] /= a; v[2] /= a;
   double qt[3][3];
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       qt[k][j] = (k==j) - 2*v[k]*v[j];
       q[k][j]= qt[k][j];
     }
@@ -235,38 +273,42 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
       qt[k][j] = (k==j) - 2*v[k]*v[j];
   mul_m2(qt,r);
   mul_m2(qt,q);
+
   // this makes r have positive diagonals
   // q*m = r <==> (-q)*m = (-r) will hold row-wise
+
   if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); }
   if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); }
   if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); }
 }
 
-
-
-//sort columns in order of increasing length
-void col_sort(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   sort columns of b in order of increasing length
+   mimic column operations on ri and r
+------------------------------------------------------------------------- */
+void col_sort(double b[3][3],int r[3][3],int ri[3][3])
 {
-  if (col_prod(b,0,0)>col_prod(b,1,1))
-  {
+  if (col_prod(b,0,0)>col_prod(b,1,1)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
+    col_swap(ri,0,1);
   }
-  if (col_prod(b,0,0)>col_prod(b,2,2))
-  {
+  if (col_prod(b,0,0)>col_prod(b,2,2)) {
     col_swap(b,0,2);
     col_swap(r,0,2);
+    col_swap(ri,0,2);
   }
-  if (col_prod(b,1,1)>col_prod(b,2,2))
-  {
+  if (col_prod(b,1,1)>col_prod(b,2,2)) {
     col_swap(b,1,2);
     col_swap(r,1,2);
+    col_swap(ri,1,2);
   }
 }
 
-
-// 1-2 reduction (Graham-Schmidt)
-void red12(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   1-2 reduction (Graham-Schmidt)
+------------------------------------------------------------------------- */
+void red12(double b[3][3],int r[3][3],int ri[3][3])
 {
   int y = round(col_prod(b,0,1)/col_prod(b,0,0));
   b[0][1] -= y*b[0][0];
@@ -276,16 +318,23 @@ void red12(double b[3][3],int r[3][3])
   r[0][1] -= y*r[0][0];
   r[1][1] -= y*r[1][0];
   r[2][1] -= y*r[2][0];
-  if (col_prod(b,1,1) < col_prod(b,0,0))
-  {
+
+  ri[0][0] += y*ri[0][1];
+  ri[1][0] += y*ri[1][1];
+  ri[2][0] += y*ri[2][1];
+
+  if (col_prod(b,1,1) < col_prod(b,0,0)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
-    red12(b,r);
+    col_swap(ri,0,1);
+    red12(b,r,ri);
   }
 }
 
-// The Semaev condition for a 3-reduced basis
-void red3(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   Apply the Semaev condition for a 3-reduced basis
+------------------------------------------------------------------------- */
+void red3(double b[3][3], int r[3][3], int ri[3][3])
 {
   double b11 = col_prod(b,0,0);
   double b22 = col_prod(b,1,1);
@@ -304,63 +353,97 @@ void red3(double b[3][3], int r[3][3])
   x1v[0] = floor(y1); x1v[1] = x1v[0]+1;
   x2v[0] = floor(y2); x2v[1] = x2v[0]+1;
   for (int k=0;k<2;k++)
-    for (int j=0;j<2;j++)
-    {
+    for (int j=0;j<2;j++) {
       double a[3];
       a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1];
       a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1];
       a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1];
       double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2];
-      if (val<min)
-      {
+      if (val<min) {
         min = val;
         x1 = x1v[k];
         x2 = x2v[j];
       }
     }
-  if (x1 || x2)
-  {
+  if (x1 || x2) {
     b[0][2] += x1*b[0][0] + x2*b[0][1];
     b[1][2] += x1*b[1][0] + x2*b[1][1];
     b[2][2] += x1*b[2][0] + x2*b[2][1];
     r[0][2] += x1*r[0][0] + x2*r[0][1];
     r[1][2] += x1*r[1][0] + x2*r[1][1];
     r[2][2] += x1*r[2][0] + x2*r[2][1];
-    greedy_recurse(b,r); // note the recursion step is here
+    ri[0][0] += -x1*ri[0][2];
+    ri[1][0] += -x1*ri[1][2];
+    ri[2][0] += -x1*ri[2][2];
+    ri[0][1] += -x2*ri[0][2];
+    ri[1][1] += -x2*ri[1][2];
+    ri[2][1] += -x2*ri[2][2];
+    greedy_recurse(b,r,ri); // note the recursion step is here
   }
 }
 
-// the meat of the greedy reduction algorithm
-void greedy_recurse(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   the meat of the greedy reduction algorithm
+------------------------------------------------------------------------- */
+void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3])
 {
-  col_sort(b,r);
-  red12(b,r);
-  red3(b,r); // recursive caller
+  col_sort(b,r,ri);
+  red12(b,r,ri);
+  red3(b,r,ri); // recursive caller
 }
 
-// set r (change of basis) to be identity then reduce basis and make it unique
-void greedy(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   reduce the basis b. also output the change of basis matrix r and its
+   inverse ri
+------------------------------------------------------------------------- */
+void greedy(double b[3][3],int r[3][3],int ri[3][3])
 {
   r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0;
   r[0][0]=r[1][1]=r[2][2]=1;
-  greedy_recurse(b,r);
-  make_unique(b,r);
+  ri[0][1]=ri[0][2]=ri[1][0]=ri[1][2]=ri[2][0]=ri[2][1]=0;
+  ri[0][0]=ri[1][1]=ri[2][2]=1;
+  greedy_recurse(b,r,ri);
+  make_unique(b,r,ri);
+  transpose(ri);
 }
 
-// A reduced basis isn't unique. This procedure will make it
-// "more" unique. Degenerate cases are possible, but unlikely
-// with floating point math.
-void make_unique(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   A reduced basis isn't unique. This procedure will make it
+   "more" unique. Degenerate cases are possible, but unlikely
+   with floating point math.
+------------------------------------------------------------------------- */
+void make_unique(double b[3][3], int r[3][3], int ri[3][3])
 {
-  if (fabs(b[0][0]) < fabs(b[0][1]))
-  { col_swap(b,0,1); col_swap(r,0,1); }
-  if (fabs(b[0][0]) < fabs(b[0][2]))
-  { col_swap(b,0,2); col_swap(r,0,2); }
-  if (fabs(b[1][1]) < fabs(b[1][2]))
-  { col_swap(b,1,2); col_swap(r,1,2); }
-
-  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); }
-  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); }
-  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); }
+  if (fabs(b[0][0]) < fabs(b[0][1])) {
+    col_swap(b,0,1);
+    col_swap(r,0,1);
+    col_swap(ri,0,1); 
+  }
+  if (fabs(b[0][0]) < fabs(b[0][2])) {
+    col_swap(b,0,2);
+    col_swap(r,0,2);
+    col_swap(ri,0,2);
+  }
+  if (fabs(b[1][1]) < fabs(b[1][2])) {
+    col_swap(b,1,2);
+    col_swap(r,1,2);
+    col_swap(ri,1,2);
+  }
+
+  if (b[0][0] < 0) {
+    neg_col(b,0);
+    neg_col(r,0);
+    neg_col(ri,0); 
+  }
+  if (b[1][1] < 0) {
+    neg_col(b,1);
+    neg_col(r,1);
+    neg_col(ri,1);
+  }
+  if (det(b) < 0) {
+    neg_col(b,2);
+    neg_col(r,2); 
+    neg_col(ri,2); 
+  }
 }
 }}
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index a16f6fff1a70f1e5e15b3f993efc702deaaf034f..0a1cfcc9b2ca9f897d9bf4caed142644d24cf3fe 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -27,26 +27,27 @@ class UEFBox
     bool reduce();
     void get_box(double[3][3], double);
     void get_rot(double[3][3]);
+    void get_inverse_cob(int[3][3]);
   private:
     double l0[3][3]; // initial basis
-    double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms)
-    //double edot[3], delta[2];
+    double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms)
     double theta[2];
     double l[3][3], rot[3][3], lrot[3][3];
-    int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
+    int r[3][3],ri[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
 };
 
-
 // lattice reduction routines
-void greedy(double[3][3],int[3][3]);
-void col_sort(double[3][3],int[3][3]);
-void red12(double[3][3],int[3][3]);
-void greedy_recurse(double[3][3],int[3][3]);
-void red3(double [3][3],int r[3][3]);
-void make_unique(double[3][3],int[3][3]);
+
+void greedy(double[3][3],int[3][3],int[3][3]);
+void col_sort(double[3][3],int[3][3],int[3][3]);
+void red12(double[3][3],int[3][3],int[3][3]);
+void greedy_recurse(double[3][3],int[3][3],int[3][3]);
+void red3(double [3][3],int r[3][3],int[3][3]);
+void make_unique(double[3][3],int[3][3],int[3][3]);
 void rotation_matrix(double[3][3],double[3][3],const double [3][3]);
 
 // A few utility functions for 3x3 arrays
+
 template<typename T>
 T col_prod(T x[3][3], int c1, int c2)
 {
@@ -56,8 +57,7 @@ T col_prod(T x[3][3], int c1, int c2)
 template<typename T>
 void col_swap(T x[3][3], int c1, int c2)
 {
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     T t = x[k][c2];
     x[k][c2]=x[k][c1];
     x[k][c1]=t;
@@ -101,9 +101,21 @@ bool mat_same(T x1[3][3], T x2[3][3])
 }
 
 template<typename T>
-void mul_m1(T m1[3][3], const T m2[3][3])
+void transpose(T m[3][3])
 {
   T t[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=k+1;j<3;j++) {
+      T x = m[k][j];
+      m[k][j] = m[j][k];
+      m[j][k] = x;
+    }
+}
+
+template<typename T1,typename T2>
+void mul_m1(T1 m1[3][3], const T2 m2[3][3])
+{
+  T1 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m1[k][j];
@@ -113,10 +125,10 @@ void mul_m1(T m1[3][3], const T m2[3][3])
       m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j];
 }
 
-template<typename T>
-void mul_m2(const T m1[3][3], T m2[3][3])
+template<typename T1, typename T2>
+void mul_m2(const T1 m1[3][3], T2 m2[3][3])
 {
-  T t[3][3];
+  T2 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m2[k][j];
diff --git a/src/compute_cluster_atom.cpp b/src/compute_cluster_atom.cpp
index 146f8fd1b3c7d64491e1213038b94c4ba9d0a32a..85934c5e6d20df3a6eaa915d8269d32988147120 100644
--- a/src/compute_cluster_atom.cpp
+++ b/src/compute_cluster_atom.cpp
@@ -31,6 +31,8 @@
 
 using namespace LAMMPS_NS;
 
+enum{CLUSTER,MASK,COORDS};
+
 /* ---------------------------------------------------------------------- */
 
 ComputeClusterAtom::ComputeClusterAtom(LAMMPS *lmp, int narg, char **arg) :
@@ -44,7 +46,7 @@ ComputeClusterAtom::ComputeClusterAtom(LAMMPS *lmp, int narg, char **arg) :
 
   peratom_flag = 1;
   size_peratom_cols = 0;
-  comm_forward = 1;
+  comm_forward = 3;
 
   nmax = 0;
 }
@@ -122,10 +124,19 @@ void ComputeClusterAtom::compute_peratom()
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
+  // if update->post_integrate set:
+  // a dynamic group in FixGroup is invoking a variable with this compute
+  // thus ghost atom coords need to be up-to-date after initial_integrate()
+
+  if (update->post_integrate) {
+    commflag = COORDS;
+    comm->forward_comm_compute(this);
+  }
+
   // if group is dynamic, insure ghost atom masks are current
 
   if (group->dynamic[igroup]) {
-    commflag = 0;
+    commflag = MASK;
     comm->forward_comm_compute(this);
   }
 
@@ -147,7 +158,7 @@ void ComputeClusterAtom::compute_peratom()
   // iterate until no changes in my atoms
   // then check if any proc made changes
 
-  commflag = 1;
+  commflag = CLUSTER;
   double **x = atom->x;
 
   int change,done,anychange;
@@ -203,17 +214,25 @@ int ComputeClusterAtom::pack_forward_comm(int n, int *list, double *buf,
   int i,j,m;
 
   m = 0;
-  if (commflag) {
+  if (commflag == CLUSTER) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = clusterID[j];
     }
-  } else {
+  } else if (commflag == MASK) {
     int *mask = atom->mask;
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = ubuf(mask[j]).d;
     }
+  } else if (commflag == COORDS) {
+    double **x = atom->x;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = x[j][0];
+      buf[m++] = x[j][1];
+      buf[m++] = x[j][2];
+    }
   }
 
   return m;
@@ -227,11 +246,18 @@ void ComputeClusterAtom::unpack_forward_comm(int n, int first, double *buf)
 
   m = 0;
   last = first + n;
-  if (commflag)
+  if (commflag == CLUSTER) {
     for (i = first; i < last; i++) clusterID[i] = buf[m++];
-  else {
+  } else if (commflag == MASK) {
     int *mask = atom->mask;
     for (i = first; i < last; i++) mask[i] = (int) ubuf(buf[m++]).i;
+  } else if (commflag == COORDS) {
+    double **x = atom->x;
+    for (i = first; i < last; i++) {
+      x[i][0] = buf[m++];
+      x[i][1] = buf[m++];
+      x[i][2] = buf[m++];
+    }
   }
 }
 
diff --git a/src/fix_group.cpp b/src/fix_group.cpp
index 2447002bc5202350b779d390dc31124a7f5dd5a1..10736964e788d5b76d47ea392737c35f714fedb9 100644
--- a/src/fix_group.cpp
+++ b/src/fix_group.cpp
@@ -84,7 +84,7 @@ idregion(NULL), idvar(NULL), idprop(NULL)
       idprop = new char[n];
       strcpy(idprop,arg[iarg+1]);
       iarg += 2;
-        } else if (strcmp(arg[iarg],"every") == 0) {
+    } else if (strcmp(arg[iarg],"every") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal group command");
       nevery = force->inumeric(FLERR,arg[iarg+1]);
       if (nevery <= 0) error->all(FLERR,"Illegal group command");
@@ -204,17 +204,22 @@ void FixGroup::set_group()
   int nlocal = atom->nlocal;
 
   // invoke atom-style variable if defined
+  // set post_integrate flag to 1, then unset after
+  // this is for any compute to check if it needs to 
+  //   operate differently due to invocation this early in timestep
+  // e.g. perform ghost comm update due to atoms having just moved
 
   double *var = NULL;
   int *ivector = NULL;
   double *dvector = NULL;
 
-
   if (varflag) {
+    update->post_integrate = 1;
     modify->clearstep_compute();
     memory->create(var,nlocal,"fix/group:varvalue");
     input->variable->compute_atom(ivar,igroup,var,1,0);
     modify->addstep_compute(update->ntimestep + nevery);
+    update->post_integrate = 0;
   }
 
   // invoke per-atom property if defined
diff --git a/src/group.cpp b/src/group.cpp
index 9d33da9acb4e623ea80a0e1f5e9e5fe1928ada07..dd5e53bb3c58234655c8fcf76c171dc84cf386b1 100644
--- a/src/group.cpp
+++ b/src/group.cpp
@@ -754,6 +754,18 @@ void Group::read_restart(FILE *fp)
 // computations on a group of atoms
 // ----------------------------------------------------------------------
 
+/* ----------------------------------------------------------------------
+   count atoms in group all
+------------------------------------------------------------------------- */
+
+bigint Group::count_all()
+{
+  bigint nme = atom->nlocal;
+  bigint nall;
+  MPI_Allreduce(&nme,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world);
+  return nall;
+}
+
 /* ----------------------------------------------------------------------
    count atoms in group
 ------------------------------------------------------------------------- */
diff --git a/src/group.h b/src/group.h
index 6b6cbb1def7e4d2e5340fa6ce3f0b14664f642f3..962d37b32ad53538dba1fa36f31e1338d79f6b37 100644
--- a/src/group.h
+++ b/src/group.h
@@ -37,6 +37,7 @@ class Group : protected Pointers {
   void write_restart(FILE *);
   void read_restart(FILE *);
 
+  bigint count_all();                      // count atoms in group all
   bigint count(int);                       // count atoms in group
   bigint count(int,int);                   // count atoms in group & region
   double mass(int);                        // total mass of atoms in group
diff --git a/src/thermo.cpp b/src/thermo.cpp
index ade7a3c333ab893fdd14a17adaa94a966328b64a..ddbbd0f496eab5c87b59f028a5b4adaac8f1b764 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -1698,7 +1698,7 @@ void Thermo::compute_timeremain()
 
 void Thermo::compute_atoms()
 {
-  bivalue = atom->natoms;
+  bivalue = group->count_all();
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/update.cpp b/src/update.cpp
index aa152a850857e729f78da1db98e678e41140f1d7..6f9c2c9a076ff8b911431bf623efd4a47301f6fe 100644
--- a/src/update.cpp
+++ b/src/update.cpp
@@ -46,11 +46,11 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp)
   whichflag = 0;
   firststep = laststep = 0;
   beginstep = endstep = 0;
+  restrict_output = 0;
   setupflag = 0;
+  post_integrate = 0;
   multireplica = 0;
 
-  restrict_output = 0;
-
   eflag_global = vflag_global = -1;
 
   unit_style = NULL;
diff --git a/src/update.h b/src/update.h
index 79964403182db7add9289a3dc4945ef14eaf9386..d3602ef21e98b0dfcbc036e29625831d3a27be3a 100644
--- a/src/update.h
+++ b/src/update.h
@@ -35,6 +35,7 @@ class Update : protected Pointers {
   int max_eval;                   // max force evaluations for minimizer
   int restrict_output;            // 1 if output should not write dump/restart
   int setupflag;                  // set when setup() is computing forces
+  int post_integrate;             // 1 if now at post_integrate() in timestep
   int multireplica;               // 1 if min across replicas, else 0
 
   bigint eflag_global,eflag_atom;  // timestep global/peratom eng is tallied on